RNA-seq report for patient sample CCR180038_SV18T002P006_RNA.

##### We attempt to structure the script in the following way:

# 1. Defining functions
# 2. Loading libraries
# 3. Loading sample data and reference datasets
# Then... code chunks involving data processing
# Then... code chunks calling the processed data to produce tables / plots / data summary
# Finish with Session info in Addendum section

##### The processed data is stored in "ref_datasets.list" list variable with elements holding the following data:

# 1. ref_datasets.list[[tissue]][["combined_data"]] = combined read count data (reference datasets + sample data) ("combineDatasets" function output in the "load_ref_data chunk")

# 2. ref_datasets.list[[tissue]][["sample_annot"]] = combined data samples annotation ("combineDatasets" function output in the "load_ref_data chunk")

# 3. ref_datasets.list[[tissue]][["combined_data_processed"]] = transformed, filtered and normalised data (see "data_transformation" and "data_normalisation" chunks)

# 4. ref_datasets.list[[tissue]][["pca"]] = PCA results

# 5. ref_datasets.list[[tissue]][["gene_annot_all"]] = gene annotation for combined read count data, containing all input genes. The annotation includes "SYMBOL", "GENEBIOTYPE", "ENSEMBL", "SEQNAME", "GENESEQSTART", "GENESEQEND". "ENSEMBL" is used for rownames

# 6. ref_datasets.list[[tissue]][["gene_annot"]] = gene annotation for transformed, filtered and normalised data. The annotation includes "SYMBOL", "GENEBIOTYPE", "ENSEMBL", "SEQNAME", "GENESEQSTART", "GENESEQEND". "SYMBOL" is used for rownames

# 7. ref_datasets.list[[tissue]][["expr_mut_cn_data"]] = combined expression, mutation and copy-number data
##### Define functions
##### Create 'not in' operator
"%!in%" <- function(x,table) match(x,table, nomatch = 0) == 0

##### Prepare object to write into a file
prepare2write <- function (x) {
  
  x2write <- cbind(rownames(x), x)
  colnames(x2write) <- c("Gene",colnames(x))
  return(x2write)
}

##### Combine sample expression profile with reference datasets. This function outputs a vector with first element containing the merged data and second element containing merged targets info
combineDatasets <- function(sample_name, sample_counts, ref_dataset) {
  
  ##### Read file with reference datasets information
  DatasetInput <- read.table(ref_dataset, sep="\t", as.is=TRUE, header=TRUE, row.names=1)
  
  ##### Extract info about target file for the first dataset
  fileInfo <- strsplit(DatasetInput[,"Target_file"], split='/', fixed=TRUE)
  targetFile <- read.table(DatasetInput[1,"Target_file"], sep="\t", as.is=TRUE, header=TRUE)[,c(1:4)]
  rownames(targetFile) <- targetFile[,"Sample_name"]
  targetFile <- cbind(targetFile[,2:4],rownames(DatasetInput[1,]))
  colnames(targetFile)[ncol(targetFile)] <- "Dataset"
  
  if ( nrow(DatasetInput) > 1 ) {
    for ( i in 2:nrow(DatasetInput) ) {
        
      ##### Create a temporary object to store info from the remaining target files
      targetFileTmp <- read.table(DatasetInput[i,"Target_file"], sep="\t", as.is=TRUE, header=TRUE)[,c(1:4)]
      rownames(targetFileTmp) <- targetFileTmp[,"Sample_name"]
      targetFileTmp <- cbind(targetFileTmp[,2:4],rownames(DatasetInput[i,]))
      colnames(targetFileTmp)[ncol(targetFileTmp)] <- "Dataset"
        
      targetFile <- rbind(targetFile, targetFileTmp)
    }
  }  
  
  ##### Add sample info
  sampleTargetFile <- data.frame(sample_counts, sample_name, NA, sample_name)
  names(sampleTargetFile) <- names(targetFile)
  rownames(sampleTargetFile) <- sample_name
  targetFile <- rbind( targetFile, sampleTargetFile )
  
  ##### Make syntactically valid names
  rownames(targetFile) <- make.names(rownames(targetFile))
  
  ##### Read sample read count file and combine it with reference datasets
  datasets.comb <- read.table(sample_counts, sep="\t", as.is=TRUE, header=FALSE, row.names=NULL)
  names(datasets.comb) <- c("", sample_name)
      
  ##### list genes present in the read count file
  gene_list <- as.vector(datasets.comb[,1])
      
  ##### Loop through the expression data from different datasets and merge them into one matrix
  for ( data_matrix in DatasetInput[ , "Expression_matrix" ] ) {
    
    ##### Add data from the reference datasets
    dataset <- as.data.frame( read.table(data_matrix, header=TRUE, sep="\t", row.names=NULL) )
      
    ##### list genes present in individal files
    gene_list <- c( gene_list, as.vector(dataset[,1]) )
    
    ##### Merge the expression datasets and make sure that the genes order is the same
    datasets.comb <- merge( datasets.comb, dataset, by=1, all = FALSE, sort= TRUE)
      
    ##### Remove per-sample data for merged samples to free some memory
    rm(dataset)
  }
  
  ##### Use gene IDs as rownames
  rownames(datasets.comb) <- datasets.comb[,1]
  datasets.comb <- datasets.comb[, -1]
  
  ##### Make syntactically valid names
  colnames(datasets.comb) <- make.names(colnames(datasets.comb))
  
  ##### Make sure that the target file contains info only about samples present in the data matrix
  targetFile <- targetFile[ rownames(targetFile) %in% colnames(datasets.comb),  ]
  
  ##### Make sure that the samples order in the data matrix is the same as in the target file 
  datasets.comb <- datasets.comb[ , rownames(targetFile) ]
  
  ##### Identify genes that were not present across all per-sampel files and were ommited in the merged matrix
  gene_list <- unique(gene_list)
  gene_list.missing <- gene_list[ gene_list %!in% rownames(datasets.comb) ]
  
  ##### Write list of missing genes into a file
  if ( length(gene_list.missing) > 0 ) {
    write.table(prepare2write(gene_list.missing), file = paste0(params$report_dir, "/", sample_name,".missing_genes.txt"), sep="\t", quote=FALSE, row.names=TRUE, append = FALSE )
  }
  
    return( list(datasets.comb, targetFile) )
}


##### Assign colours to different groups
getTargetsColours <- function(targets) {
  
##### Predefined selection of colours for groups
targets.colours <- c("red","cornflowerblue","green","darkred","darkgoldenrod","deepskyblue", "coral", "blue", "chartreuse4", "bisque4", "chocolate3", "cadetblue3", "darkslategrey", "lightgoldenrod4", "mediumpurple4", "orangered3","indianred1","blueviolet","darkolivegreen4","darkgoldenrod4","firebrick3","deepskyblue4", "coral3", "dodgerblue1", "chartreuse3", "bisque3", "chocolate4", "cadetblue", "darkslategray4", "lightgoldenrod3", "mediumpurple3", "orangered1")
  
  f.targets <- factor(targets)
  vec.targets <- targets.colours[1:length(levels(f.targets))]
  targets.colour <- rep(0,length(f.targets))
  for(i in 1:length(f.targets))
    targets.colour[i] <- vec.targets[ f.targets[i]==levels(f.targets)]
  
  return( list(vec.targets, targets.colour) )
}

##### Assign colours to different datasets
getDatasetsColours <- function(datasets) {
  
  ##### Predefined selection of colours for datasets
  datasets.colours <- c("dodgerblue","firebrick","lightslategrey","darkseagreen","orange","darkcyan","bisque", "coral2", "cadetblue3","red","blue","green")
  
  f.datasets <- factor(datasets)
  vec.datasets <- datasets.colours[1:length(levels(f.datasets))]
  datasets.colour <- rep(0,length(f.datasets))
  for(i in 1:length(f.datasets))
    datasets.colour[i] <- vec.datasets[ f.datasets[i]==levels(f.datasets)]
  
  return( list(vec.datasets, datasets.colour) )
}

##### Perform PCA. This function outputs a list with dataframe and samples colouring info ready for plotting
pca <- function(data, targets) {

  ##### Keep only genes with variance > 0 across all samples
  rsd <- apply(data,1,sd)
  data.subset <- data[rsd>0,]
  
  ##### Perform PCA
  data.subset_pca <- prcomp(t(data.subset), scale=FALSE)
  
  ##### Get variance importance for all principal components
  importance_pca <- summary(data.subset_pca)$importance[2,]
  importance_pca <- paste(round(100*importance_pca, 2), "%", sep="")
  names(importance_pca) <- names(summary(data.subset_pca)$importance[2,])
    
  ##### Prepare data frame
  data.subset_pca.df <- data.frame(targets$Target, targets$Dataset, data.subset_pca$x[,"PC1"], data.subset_pca$x[,"PC2"], data.subset_pca$x[,"PC3"])
  colnames(data.subset_pca.df) <- c("Target", "Dataset", "PC1", "PC2", "PC3")
  
  ##### Assigne colours to targets and datasets
  targets.colour <- getTargetsColours(target$Target)
  datasets.colour <- getDatasetsColours(target$Dataset)
  
  ##### Create a list with dataframe and samples colouring info
  pca.list <- list(data.subset_pca.df, importance_pca, targets.colour, datasets.colour)
  names(pca.list) <- c("pca.df", "importance_pca", "targets", "datasets")
  
  return( pca.list )
}

##### Convert a vector of numbers into corresponding vector of their percentiles
perc.rank <- function(x) trunc(rank(x))*100/length(x)

##### Perform range standardization between 0 and 1 (for the cumulative sums)
standardization <- function(x) sort(x-min(x))/(max(x)-min(x))

##### Calculate mean, sd, quantiles and cumulative franctions for expression data from specific sample group
exprGroupStats <- function(data, targets, target) {
  
  ##### Subset data for defined biological group
  data.group <- data[, targets$Target %in% target ]
  
  ##### For groups with > 1 sample get the mean and standard deviation for each gene
  if ( !is.null(ncol(data.group)) )  {
    
    data.group.mean <- rowMeans(data.group)
    data.group.mean <- sort(data.group.mean)
    data.group.sd <- rowSds(data.group)
    
  } else {
    data.group.mean <- sort(data.group)
    data.group.sd <- rep( NA, length(data.group))
  }
  
  ##### Make sure the mean and sd vectors have the same gene order
  names(data.group.sd) <- rownames(data)
  data.group.sd <- data.group.sd[names(data.group.mean)]

  ##### Convert a expression values into corresponding percentiles
  data.group.q <- perc.rank(data.group.mean)
  
  ##### Perform range standardization between 0 and 1 (for the cumulative sums), otherwise the negative values are summed up
  data.group.s <- standardization(data.group.mean)
  
  ##### Calculate cumulative sums and perform range standardization between 0 and 1 
  data.group.cum <- standardization(cumsum(data.group.s))
  
  ##### Perform Z-score transformation of the mean expression values
  data.group.z <- scale(data.group.mean)
  
  ##### Organise the data into data frame
  data.group.df <- as.data.frame(cbind( data.group.mean, data.group.sd, data.group.z, data.group.q, data.group.cum))
  names(data.group.df) <- c("mean", "sd", "z", "quantile", "cumulative_fraction")
  
  return( data.group.df )
}

##### Generate cumulative distribution function (CDF) plot for selected gene. If option "addBoxPlot" = TRUE, then generate additional boxplot below to show the data variance for selected gene in individual groups
cdfPlot <- function(gene, data, targets, sampleName, normal, cancer, addBoxPlot = FALSE, plot_mode = "static") {
  
  ##### Get expression-related stats for each group
  sample.expr.cum <- exprGroupStats(data, targets, sampleName)
  cancer.expr.cum <- exprGroupStats(data, targets, cancer)
  normal.expr.cum <- exprGroupStats(data, targets, normal)
  
  ##### Extract expression for selected genes
  sample.expr.cum.selected <- sample.expr.cum[ rownames(sample.expr.cum) %in% gene, ]
  cancer.expr.cum.selected <- cancer.expr.cum[ rownames(cancer.expr.cum) %in% gene, ]
  normal.expr.cum.selected <- normal.expr.cum[ rownames(normal.expr.cum) %in% gene, ]
  
  ##### Generate box-plot for selected gene
  if ( addBoxPlot ) {
    
    data.z <- scale(data)
    targets$Target[ targets$Target==sampleName ] <- "Patient"
    
    gene.expr.df <- data.frame(targets$Target, data.z[gene, ])
    colnames(gene.expr.df) <- c("Group", "Expression")
    
    ##### Reorder groups
    gene.expr.df$Group <- factor(gene.expr.df$Group, levels=c(normal, cancer, "Patient"))
    
    p2 <- plot_ly(gene.expr.df, x= ~Expression, color = ~Group, type = 'box', jitter = 0.3, pointpos = 0, boxpoints = 'all', colors = c("darkgreen", "red", "black"), opacity = 0.5, orientation = 'h', width = 800, height = 600, showlegend=FALSE)
  }
  
  ##### Generate interactive CFD plot with plotly
  p1 <- plot_ly(sample.expr.cum, x = ~z, color = I("black")) %>%
  
    ##### Add sample data
    add_markers(y = sample.expr.cum.selected$cumulative_fraction, x = sample.expr.cum.selected$z,
                text = rownames(sample.expr.cum.selected ),
                name = "Patient",
                marker = list(size = 12, color = "black"),
                showlegend = TRUE) %>%
  
    add_lines(y = sample.expr.cum$cumulative_fraction, x = sample.expr.cum$z, 
              line = list(color = "grey"),
              text = rownames( sample.expr.cum ),
              name = "Patient", showlegend = FALSE) %>%
    
    ##### Add cancer data
    add_markers(y = cancer.expr.cum.selected$cumulative_fraction, x =  cancer.expr.cum.selected$z,
                text = rownames( cancer.expr.cum.selected),
                name = cancer,
                marker = list(size = 12, opacity = 0.5, color = "red"),
                showlegend = TRUE) %>%
  
    add_lines(y = cancer.expr.cum$cumulative_fraction, x = cancer.expr.cum$z, opacity = 0.5,
              line = list(color = "red", dash = "dash"),
              text = rownames( cancer.expr.cum ),
              name = cancer, showlegend = FALSE) %>%
    
    ##### Add normal data
    add_markers(y = normal.expr.cum.selected$cumulative_fraction, x =  normal.expr.cum.selected$z,
                text = rownames( normal.expr.cum.selected ),
                name = normal,
                marker = list(size = 12, opacity = 0.5, color = "green"),
                showlegend = TRUE) %>%
  
    add_lines(y = normal.expr.cum$cumulative_fraction, x = normal.expr.cum$z, opacity = 0.5,
              line = list(color = "green", dash = "dash"),
              text = rownames( normal.expr.cum ),
              name = normal, showlegend = FALSE) %>%
    
    ##### Add quantile lines
    add_lines(y = seq(0,1,0.1), x = rep(quantile(sample.expr.cum$z)[2], 11), opacity = 0.5,
              line = list(color = "gray", dash = "dash"),
              name = "Q1", showlegend = FALSE) %>%
    
    add_lines(y = seq(0,1,0.1), x = rep(quantile(sample.expr.cum$z)[3], 11), opacity = 0.5,
              line = list(color = "gray", dash = "dash"),
              name = "Q2", showlegend = FALSE) %>%
    
    add_lines(y = seq(0,1,0.1), x = rep(quantile(sample.expr.cum$z)[4], 11), opacity = 0.5,
              line = list(color = "gray", dash = "dash"),
              name = "Q3", showlegend = FALSE) %>% 
    
        layout(title = gene, xaxis = list(title = "mRNA expression (Z-score)", zeroline = FALSE, range = c(min(sample.expr.cum$z)-1.5, max(sample.expr.cum$z)+1.5)),
           yaxis = list(title = "Cumulative fraction"),
           legend = list(orientation = 'v', x = 0.02, y = 1, bgcolor = "white")
           
           ##### Annotate the gene of interest
           #annotations = list(x = sample.expr.cum.selected$z, y = sample.expr.cum.selected$cumulative_fraction,
           #text = rownames(sample.expr.cum.selected), xref = "x", yref = "y", showarrow = TRUE, arrowhead = 0, opacity = 0.5, ax = 30, ay = 30)
    )
  
  ##### Combine CDF plot with boxplot if this option is selected
  if ( addBoxPlot ) {
    
    p1_2 <- subplot(p1, p2, nrows = 2, shareX = TRUE, shareY = FALSE, titleY = TRUE) %>%
  layout(xaxis = list(title = "mRNA expression (Z-score)", zeroline = FALSE, range = c(min(sample.expr.cum$z)-1.5, max(sample.expr.cum$z)+1.5)),
          yaxis = list(title = "Cumulative fraction"),
          legend = list(orientation = 'v', x = 0.02, y = 1, bgcolor = "white"),
          yaxis2 = list( title =""), xaxis2 = list(title = paste0(gene, " mRNA expression (Z-score)")), margin = list(l=50, r=50, b=50, t=50, pad=4), autosize = FALSE,
         showlegend=TRUE, showlegend2=FALSE)
    
    ##### Embed static rather interactive plots into the html report if requested by user. This will reduce the report size. TO this end the orca() function in plotly is used. Of note, this requires orca (https://github.com/plotly/orca) installation (conda option worked well for me, https://github.com/plotly/orca#method-1-conda), but the orca needs to be in the PATH, see https://github.com/plotly/orca/pull/122). 
    if ( plot_mode == "static" ) {

      ##### Create directory for CDF plots
      cdfPlotsDir <- paste(params$report_dir, "CDF_plots", sep = "/")
      if ( !file.exists(cdfPlotsDir) ) {

        dir.create(cdfPlotsDir, recursive=TRUE)
      }
  
      ##### Add access token, required by orca function, to the shell environment
      Sys.setenv('MAPBOX_TOKEN' = 'secret token')
  
      ##### Change directory to folder with CDF plots
      setwd(cdfPlotsDir)
  
      ##### Save the static image into a file
      orca(p1_2, format = "png", file = paste0(gene, "_cdf_plot.png"), width = 800, height = 600)
  
      ##### Present the static plot in the report
      include_graphics(paste(cdfPlotsDir, paste0(gene, "_cdf_plot.png"), sep = "/"), dpi = 72)
  
    } else if ( plot_mode == "interactive" ) {

      return( p1_2 )
    }
    
  } else {
   
    if ( plot_mode == "static") {

      ##### Create directory for CDF plots
      cdfPlotsDir <- paste(params$report_dir, "CDF_plots", sep = "/")
      if ( !file.exists(cdfPlotsDir) ) {

        dir.create(cdfPlotsDir, recursive=TRUE)
      }
  
      ##### Add access token, required by orca function, to the shell environment
      Sys.setenv('MAPBOX_TOKEN' = 'secret token')
  
      ##### Change directory to folder with CDF plots
      setwd(cdfPlotsDir)
  
      ##### Save the static image into a file
      orca(p1, format = "png", file = paste0(gene, "_cdf_plot.png"), width = 800, height = 200)
  
      ##### Present the static plot in the report
      include_graphics(paste(cdfPlotsDir, paste0(gene, "_cdf_plot.png"), sep = "/"), dpi = 72)
  
    } else if ( plot_mode == "interactive" ) {

      return( p1 )
    }
  }
}

##### Generate scatterplot with per-gene expression values (y-axis), CN values (x-axis) and mutation status info (colours), if provided
mutCNexprPlot <- function(data, mut_data = FALSE, plot_mode = "static") {
  
  ##### Generate scatterplot with per-gene expression values (y-axis), CN values (x-axis) and mutation status info (colours)
  if ( mut_data ) {
  
    p <- plot_ly(data, x = ~CN, y = ~mRNA, color = ~Mutation, text=~Gene, type='scatter', mode = "markers", marker = list(size=10, symbol="circle"), width = 800, height = 600) %>%
      
      add_annotations( text="Variant consequence", xref="paper", yref="paper",
                      x=1.02, xanchor="left",
                      y=0.85, yanchor="top",
                      legendtitle=TRUE, showarrow=FALSE ) %>%
      
      add_annotations( data = data[ data$CN > 10 | data$CN < 0.5 ,], text=~Gene,
                      x=~CN, xanchor="left",
                      y=~mRNA, yanchor="top",
                      font = list(color = "Grey",
                                size = 10),
                      legendtitle=TRUE, showarrow=FALSE ) %>%
      
      layout( xaxis = list(title = "Copy-number values"), yaxis = list(title = "mRNA expression (Z-score)"), margin = list(l=50, r=50, b=50, t=50, pad=4), autosize = F, legend = list( orientation = 'v', y=0.8, yanchor="top"), showlegend=TRUE)
  
  ##### Generate scatterplot with per-gene expression values (y-axis) and CN values (x-axis)
  } else {
  
    p <- plot_ly(data, x = ~CN, y = ~mRNA, text=~Gene, type='scatter', mode = "markers", marker = list(size=10, symbol="circle"), width = 800, height = 600) %>%
      
      add_annotations( data = data[ data$CN > 10 | data$CN < 0.5 ,], text=~Gene,
                      x=~CN, xanchor="left",
                      y=~mRNA, yanchor="top",
                      font = list(color = "Grey",
                                size = 10),
                      legendtitle=TRUE, showarrow=FALSE ) %>%
      
      layout( xaxis = list(title = "Copy-number values"), yaxis = list(title = "mRNA expression (Z-score)"), margin = list(l=50, r=50, b=50, t=50, pad=4), autosize = F, legend = list( orientation = 'v', y=0.8, yanchor="top"), showlegend=FALSE)
  }

  ##### Embed static rather interactive plots into the html report if requested by user. This will reduce the report size. TO this end the orca() function in plotly is used. Of note, this requires orca (https://github.com/plotly/orca) installation (conda option worked well for me, https://github.com/plotly/orca#method-1-conda), but the orca needs to be in the PATH, see https://github.com/plotly/orca/pull/122). 
  if ( plot_mode == "static" ) {

    ##### Create directory for the plots
    mutCNexprPlotDir <- paste(params$report_dir, "mut_cn_expr_plot", sep = "/")
    if ( !file.exists(mutCNexprPlotDir) ) {

      dir.create(mutCNexprPlotDir, recursive=TRUE)
    }

    ##### Add access token, required by orca function, to the shell environment
    Sys.setenv('MAPBOX_TOKEN' = 'secret token')

    ##### Change directory to folder with CDF plots
    setwd(mutCNexprPlotDir)

    ##### Save the static image into a file
    orca(p, format = "png", file = "mut_cn_expr_plot.png", width = 800, height = 600)

    ##### Present the static plot in the report
    include_graphics(paste(mutCNexprPlotDir, "mut_cn_expr_plot.png", sep = "/"), dpi = 72)
  
  } else if ( plot_mode == "interactive" ) {

    return( p )
  }
}

##### Fusion visualisation 
fusion_png <- function(geneA, geneB, clinker_results ) {

  ##### Get path to fusion visualisation  pdf file
  fusion_pdf <- paste(clinker_results, paste0(geneA, "_", geneB, ".pdf"), sep = "/")
  
  ##### Export pdf to png
  fusion_png <- paste(clinker_results, paste0(geneA, "_", geneB, ".png"), sep = "/")
  fusion <- image_read_pdf(fusion_pdf, pages = NULL, density = 300)
  image_write(fusion, path = fusion_png, format = "png")
  
  ##### Present the converted file in the report
  include_graphics(fusion_png)
}


##### Generate table with coloured cells indicating expression values for selected genes
exprTable <- function(genes, data, cn_data = NULL, cn_decrease = TRUE, targets, sampleName, normal, cancer, genes_annot = NULL, OncoKB_genes = NULL, mut_annot = NULL, type = "z") {
  
  ##### Check which of the selected genes are not present in the expression data
  genes.absent <- genes[ genes %!in% rownames(data) ]
  
  targets.list <- unique(targets$Target)
  
  ##### Initiate dataframe for expression mean values in each group
  group.z <- as.data.frame(matrix(NA, ncol = length(targets.list), nrow = nrow(data)))
  colnames(group.z) <- targets.list
  rownames(group.z) <- sort(rownames(data))
  
  for ( group in targets.list ) {
    
    ##### Calculate z-score for each group  
    group.stats <- exprGroupStats(data[rownames(group.z), ], targets, group)
    group.stats <- group.stats[order(rownames(group.stats)), ]
    
    #### Present expression data as percentiles or z-score values (default)
    if ( type == "perc" ) {
      
      group.z[, group] <- round(group.stats$quantile, digits=1)

    } else {
      group.z[, group] <- round(group.stats$z, digits=2)
    }
  } 
  
  ##### Compute Z-scores sd for each gene across groups
  group.z <- cbind(group.z, round(rowSds(as.matrix(group.z)), digits = 2))
  names(group.z)[ncol(group.z)] <- "SD"
  
  ##### Calculate Z-score differneces between investigated sample and normal group mean values
  group.z <- cbind(group.z, round((group.z[, sampleName] - group.z[, normal]), digits = 2))
  names(group.z)[ncol(group.z)] <- "Patient vs Normal"
  
  ##### Calculate Z-score differneces between investigated sample and cancer group mean values
  group.z <- cbind(group.z, round((group.z[, sampleName] - group.z[, cancer]), digits = 2))
  names(group.z)[ncol(group.z)] <- paste0("Patient vs ", cancer)
  
  ##### Add NAs for genes that are absent in the expression matrix. In the "Patient vs Normal" and "Patient vs PDAC" columns provide "0"s to facilitate interactive sorting the table. These will appear in blank cells in the table
  if ( length(genes.absent) > 0 ) {
    
    NAs.df <- data.frame(matrix(NA, ncol = ncol(group.z), nrow = length(genes.absent)))
    names(NAs.df) <- names(group.z)
    rownames(NAs.df) <- genes.absent
    NAs.df[ names(NAs.df) %in% c("Patient vs Normal", "Patient vs PDAC")] <- 0
    
    group.z <- rbind( group.z,  NAs.df)
  }
  
  ##### Change sample ID to "Patient" and normal sample to "Normal" for better visualisation
  names(group.z)[names(group.z)==sampleName] <- "Patient"
  targets.list[targets.list==sampleName] <- "Patient"
  names(group.z)[names(group.z)==normal] <- "Normal"
  targets.list[targets.list==normal] <- "Normal"
  
  ##### Reorder groups
  group.z <- cbind(group.z[ , c("Normal", cancer, "Patient")], group.z[, c("SD", "Patient vs Normal", "Patient vs PDAC" )])
    
  ##### Add genes annotation
  if ( !is.null(genes_annot) ) {
    
    ##### Remove rows with duplicated gene symbols
    if ( "SYMBOL" %in% names(genes_annot) ) {
      
      genes_annot <- genes_annot[!duplicated(genes_annot$SYMBOL),]  
      rownames(genes_annot) <- genes_annot$SYMBOL
      genes_annot <- genes_annot[ , -c(match("SYMBOL", names(genes_annot))), drop=FALSE]
    }
    
    ##### Merge the dataframe with groups mean expression values and gene annotations
    group.z <- merge(genes_annot, group.z, by="row.names", all = TRUE, sort = FALSE)
    rownames(group.z) <- group.z$Row.names
    names(group.z) <- gsub("Row.names", "Gene", names(group.z))
    
    ##### Include only queried genes
    group.z <- group.z[ genes, ]
    
  } else {
    group.z <- cbind(rownames(group.z), group.z)
  }
  
  ##### Add cancer gene resources info
  if ( !is.null(OncoKB_genes) ) {
    
    group.z <- merge(group.z, OncoKB_genes, by="row.names", all = TRUE, sort = FALSE)
    rownames(group.z) <- group.z$Row.names
    group.z <- group.z[ , -c(match("Row.names", names(group.z))), drop=FALSE]
    
    ##### Provide link to OncoKB for reported cancer genes
    for ( gene in genes ) {
      
      if ( gene %in% rownames(OncoKB_genes) & OncoKB_genes[gene, "OncoKB"] == "Yes" ) {
        
        group.z$Gene[ group.z$Gene==gene] <- paste0("<a href='http://oncokb.org/#/gene/", gene, "' target='_blank'>", gene, "</a>")
      }
    }
  }
      
  ##### Order table by the highest absolute values for Patient vs Normal difference
  group.z <- group.z[ order(abs(group.z[, "Patient vs Normal"]),  decreasing = TRUE), ]
  
  ##### Define colours for cells background for each group and the patient vs normal difference
  ##### Initiate dataframe for expression mean values in each group
  brks.q <- as.data.frame( matrix(NA, ncol = length(targets.list), nrow = length(seq(.05, .95, .0005)) ))
  colnames(brks.q) <- targets.list
  clrs.q <- as.data.frame( matrix(NA, ncol = length(targets.list), nrow = length(seq(.05, .95, .0005))+1 ))
  colnames(clrs.q) <- targets.list
  
  for ( group in c(targets.list, "Patient vs Normal", paste0("Patient vs ", cancer)) ) {

    brks.q[[group]] <- quantile(group.z[, group], probs = seq(.05, .95, .0005), na.rm = TRUE)
    #brks.q[[group]] <- sort(group.z[, group])
    
    clrs_pos.q <- round(seq(255, 150, length.out = length(brks.q[[group]])/2 + 1.5), 0) %>%
    {paste0("rgb(255,", ., ",", ., ")")}
    clrs_neg.q <- rev(round(seq(255, 150, length.out = length(brks.q[[group]])/2 - 0.5), 0)) %>%
    {paste0("rgb(", .,",", .,",", "255)")}
    
    clrs.q[[group]] <- c(clrs_neg.q, clrs_pos.q)
  }
  
  ##### Subset the expression data to include only the user-defined genes
  group.z <- group.z[ rownames(group.z) %in% genes, ]
  names(group.z)[1] <- "Gene"
    
  #### Add variants information to the expression table - if exists. Note, "TIER" and "CONSEQUENCE" columns are required
  if( !is.null(mut_annot) && "TIER" %in% colnames(mut_annot) ) {
    
    mut_annot <- mut_annot[mut_annot$SYMBOL %in% genes,]
    
    #### keep only varaints that has the lowest tier value. Multiple varaints detected in same gene but with higher tier will be added to additional column "CONSEQUENCE_OTHER". Applies to the ones that may have multiple mutations and hence tiers
    ##### First, create a list of genes to store multiple variants
    mut_consequence <- vector("list", length(unique(mut_annot$SYMBOL)))
    mut_consequence  <- setNames(mut_consequence,  unique(mut_annot$SYMBOL) )
    
    ##### Record all varaints detected in individual genes
    for ( i in 1:nrow(mut_annot) ) {
      
      mut_consequence[[ mut_annot$SYMBOL[i] ]] <- unique(c( mut_consequence[[ mut_annot$SYMBOL[i] ]], mut_annot$CONSEQUENCE[i] ))
    }
    
    ##### Remove the first elements since these variant consequences will be reported as the "canonical" CONSEQUENCE
    mut_consequence <- lapply(mut_consequence, function(x) x[-1])
    
    ##### Order variant entires based on tier info, to make sure that the varaints with the lowest tier are reported first
    mut_annot <- mut_annot[ order(mut_annot$TIER), ]
    
    ##### Remove rows with duplicated gene symbols
    mut_annot <- mut_annot[!duplicated(mut_annot$SYMBOL),]  
    rownames(mut_annot) <- mut_annot$SYMBOL
    
    ##### Add other provided variants consequences for individual genes
    mut_annot$CONSEQUENCE_OTHER <- "-"
    
    for ( gene in rownames(mut_annot) ) {
      
      if ( length(mut_consequence[[ gene ]]) > 0 ) {
        
        mut_annot$CONSEQUENCE_OTHER[ match(gene, mut_annot$SYMBOL)  ] <- mut_consequence[[ gene ]]
      }
    }
    
    #### merge the variants information with the dataframe
    group.z <- merge(group.z, mut_annot, by = "row.names", all = TRUE, sort = FALSE)
    group.z <- group.z[, colnames(group.z) %!in% "SYMBOL"]
    
    ##### Order the data by increasing TIER category (to allow filtering based on tier information) and then by the highest absolute values for Patient vs Normal difference (to allow filtering based on z-score differences)
    group.z <- group.z[ order(abs(group.z[, "Patient vs Normal"]),  decreasing = TRUE), ]
    group.z <- group.z[ order(group.z$TIER), ]
    
    rownames(group.z) <- group.z$Row.names
    group.z <- group.z[ , -c(match("Row.names", names(group.z))), drop=FALSE]
  }
  
  ##### Add CN data if provided
  if ( !is.null(cn_data) ) {
    
    ##### Get the position of "Patient vs Cancer" column
    col_idx <- grep(paste0("Patient vs ", cancer), names(group.z))
    
    ##### Now place the CN data after the "Patient vs Cancer" column
    group.z <- add_column(group.z, round(cn_data[ rownames(group.z), "CN"], digits=2), .after = col_idx)
    colnames(group.z)[ col_idx+1 ] <- "Patient (CN)"
    
    ##### Order the data by CN values (to allow filtering based on CN information) and then by the highest absolute values for Patient vs Normal difference (to allow filtering based on z-score differences)
    group.z <- group.z[ order(abs(group.z[, "Patient vs Normal"]),  decreasing = cn_decrease), ]
    group.z <- group.z[ order(group.z[ ,col_idx+1 ],  decreasing = cn_decrease), ]
  }
  
  if ( !is.null(cn_data) ) {
    
    ##### Generate a table with genes annotations and coloured expression values in each group
    dt.table <- DT::datatable( data = group.z, filter="none", rownames = FALSE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, caption = htmltools::tags$caption( style = 'caption-side: top; text-align: left; color:grey; font-size:100% ;'), escape = FALSE) %>%
      DT::formatStyle( columns = names(group.z), `font-size` = '12px', 'text-align' = 'center' ) %>%
      
      ##### Colour cells according to the expression values quantiles in each group
      DT::formatStyle(columns = targets.list[1], 
                      backgroundColor = DT::styleInterval(brks.q[[targets.list[1]]], clrs.q[[targets.list[1]]])) %>%
      DT::formatStyle(columns = targets.list[2], 
                      backgroundColor = DT::styleInterval(brks.q[[targets.list[2]]], clrs.q[[targets.list[2]]])) %>%
      DT::formatStyle(columns = targets.list[3], 
                      backgroundColor = DT::styleInterval(brks.q[[targets.list[3]]], clrs.q[[targets.list[3]]])) %>%
      DT::formatStyle(columns = "Patient vs Normal", 
                      backgroundColor = DT::styleInterval(brks.q[["Patient vs Normal"]], clrs.q[["Patient vs Normal"]])) %>%
      DT::formatStyle(columns = paste0("Patient vs ", cancer), 
                      backgroundColor = DT::styleInterval(brks.q[[paste0("Patient vs ", cancer)]], clrs.q[[paste0("Patient vs ", cancer)]])) %>%
      DT::formatStyle(columns = "Patient (CN)", background = DT::styleColorBar(base::range(group.z[ ,"Patient (CN)" ], na.rm = TRUE), 'lightblue'), backgroundSize = '98% 88%', backgroundRepeat = 'no-repeat', backgroundPosition = 'center')
    
  } else {
    
        ##### Generate a table with genes annotations and coloured expression values in each group
    dt.table <- DT::datatable( data = group.z, filter="none", rownames = FALSE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, caption = htmltools::tags$caption( style = 'caption-side: top; text-align: left; color:grey; font-size:100% ;'), escape = FALSE) %>%
      DT::formatStyle( columns = names(group.z), `font-size` = '12px', 'text-align' = 'center' ) %>%
      
      ##### Colour cells according to the expression values quantiles in each group
      DT::formatStyle(columns = targets.list[1], 
                      backgroundColor = DT::styleInterval(brks.q[[targets.list[1]]], clrs.q[[targets.list[1]]])) %>%
      DT::formatStyle(columns = targets.list[2], 
                      backgroundColor = DT::styleInterval(brks.q[[targets.list[2]]], clrs.q[[targets.list[2]]])) %>%
      DT::formatStyle(columns = targets.list[3], 
                      backgroundColor = DT::styleInterval(brks.q[[targets.list[3]]], clrs.q[[targets.list[3]]])) %>%
      DT::formatStyle(columns = "Patient vs Normal", 
                      backgroundColor = DT::styleInterval(brks.q[["Patient vs Normal"]], clrs.q[["Patient vs Normal"]])) %>%
      DT::formatStyle(columns = paste0("Patient vs ", cancer), 
                      backgroundColor = DT::styleInterval(brks.q[[paste0("Patient vs ", cancer)]], clrs.q[[paste0("Patient vs ", cancer)]]))
  }
    
  group.z$SYMBOL <- rownames(group.z)
    
  return( list(dt.table,  group.z) )
}

##### Generate table with drugs targeting selected set of genes using info from CIViC database (https://civicdb.org/)
civicDrugTable <- function(genes, civic_var_summaries, civic_clin_evid, evid_type = "Predictive") {
  
  ##### Initialize data frame to the about drug-target info from CIViC
  drug.info <- setNames(data.frame(matrix(ncol = 18, nrow = 0)), c("Gene", "Variant", "variant_types", "drugs", "nct_ids", "evidence_level", "evidence_type", "evidence_direction", "clinical_significance", "rating", "civic_actionability_score", "Disease", "phenotypes", "pubmed_id", "variant_origin", "representative_transcript", "representative_transcript2", "last_review_date"))
  
  evid_levels <- list("A" = "A: Validated association", "B" = "B: Clinical evidence", "C" = "C: Case study", "D" = "D: Preclinical evidence", "E" = "E: Inferential association")
  
  ##### Loop thourgh each gene and check if they are druggable
  for ( gene in genes) {
    
    ##### Get summary info about druggable genes
    if ( gene %in% civic_clin_evid$gene ) {
      
      ##### Extract info about all reported variants's clinical evidence for queried gene
      clin.evid.info <- civic_clin_evid[ civic_clin_evid$gene == gene , ]

      ##### Use more descriptive evidence level info
      for ( level in unique(clin.evid.info$evidence_level) ) {
        
        clin.evid.info$evidence_level[ clin.evid.info$evidence_level == level ] <- evid_levels[[ level ]]
      }
      
      evid_type.list <- c(clin.evid.info$evidence_type == evid_type)
      
      ##### Provide link to CIViC clinical evidence summary
      clin.evid.info$evidence_type <- paste0("<a href='", clin.evid.info$evidence_civic_url, "' target='_blank'>", clin.evid.info$evidence_type, "</a>")
      
      ##### Provide link to CIViC gene summary
      clin.evid.info$gene_civic_url <- paste0("<a href='", clin.evid.info$gene_civic_url, "' target='_blank'>", gene, "</a>")
      names(clin.evid.info)[ names(clin.evid.info) =="gene_civic_url" ] <- "Gene"
      
      ##### Provide link to CIViC variants summary
      clin.evid.info$variant_civic_url <- paste0("<a href='", clin.evid.info$variant_civic_url, "' target='_blank'>", clin.evid.info$variant, "</a>")
      names(clin.evid.info)[ names(clin.evid.info) =="variant_civic_url" ] <- "Variant"
      
      ##### Provide link to ClinicalTrials.gov variants summary based on NCT IDs
      for ( nct_id in clin.evid.info$nct_ids ) {
        
        if ( !is.empty(nct_id) ) {
          
          ##### Deal with multiple NCT IDs (separated by comma)
          nct_id_url <- gsub(" '" , "'", paste(gsub("/ " , "/", paste("<a href='https://clinicaltrials.gov/ct2/show/", unlist(strsplit(nct_id, split=",", fixed=TRUE)) , "' target='_blank'>", unlist(strsplit(nct_id, split=",", fixed=TRUE)), "</a>")), collapse = ", "))
          clin.evid.info$nct_ids[ clin.evid.info$nct_ids==nct_id ] <- nct_id_url
        }
      }
      
      ##### Provide link to PubMed variants summary
      clin.evid.info$pubmed_id <- paste0("<a href='https://www.ncbi.nlm.nih.gov/pubmed/", clin.evid.info$pubmed_id, "' target='_blank'>", clin.evid.info$pubmed_id, "</a>")
      
      ##### Provide link to Disease Ontology
      clin.evid.info$doid <- paste0("<a href='http://www.disease-ontology.org/?id=DOID:", clin.evid.info$doid, "' target='_blank'>", clin.evid.info$disease, "</a>")
      names(clin.evid.info)[ names(clin.evid.info) =="doid" ] <- "Disease"
      
      ##### Extract info about all variants it that gene
      var.info <- civic_var_summaries[ civic_var_summaries$gene == gene , ]
      var.info <- var.info[, c("variant", "variant_types", "civic_actionability_score")]
      var.info[,"variant_types"] <- gsub("_", " ", var.info[,"variant_types"])
      var.info[,"variant_types"] <- gsub(",", ", ", var.info[,"variant_types"])
      
      ##### Merge about all variants it that gene and clinical evidence info
      clin.evid.info <- merge(clin.evid.info, var.info, by = "variant", all.x = TRUE)
      
      ##### Subset table to include only most important info
      clin.evid.info <- clin.evid.info[ , names(drug.info)]
      
      ##### Subset table to include only variants with the evidence type of interest
      clin.evid.info <- clin.evid.info[ evid_type.list,  ]
      
      ##### Add drugs info for subsequent gene
      drug.info <- rbind(drug.info, clin.evid.info)
    }
  }
  
  ##### Use more friendly column names for the table
  names(drug.info) <- c("Gene", "Variant", "Variant type", "Drugs", "Clinical trials", "Evidence level", "Evidence type", "Evidence direction", "Clinical significance", "Trust rating", "Actionability score", "Disease", "Phenotypes", "PubMed ID",  "Variant origin", "Representative transcript", "Representative transcript 2", "Review date")
  
  ##### Generate a table
  dt.table <- DT::datatable( data = drug.info, filter = "none", rownames = FALSE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800, caption = htmltools::tags$caption(style = 'caption-side: top; text-align: left; color:grey; font-size:100% ;'), escape = FALSE) %>%
    DT::formatStyle( columns = names(drug.info), `font-size` = '12px', 'text-align' = 'center' ) %>%
    
    ##### Colour cells according to evidence level and trust rating
    DT::formatStyle(columns = "Evidence level", 
                    backgroundColor = DT::styleEqual(c("A: Validated association", "B: Clinical evidence", "C: Case study", "D: Preclinical evidence", "E: Inferential association"), c("mediumseagreen", "deepskyblue", "mediumpurple", "darkorange", "coral")) )  %>%
    #DT::formatStyle(columns = "Trust rating", 
    #                backgroundColor = DT::styleEqual(c(1:5), c(rev(round(seq(0, 200, length.out = 5), 0) %>%  {paste0("rgb(", .,",200,", ., ")")}))) )
    DT::formatStyle(columns = "Trust rating", 
                    backgroundColor = DT::styleEqual(c(1:5), c("coral", "azure", "lightskyblue", "palegreen", "mediumseagreen")) )
  
  return( list(dt.table,  drug.info) )
}

##### Generate bezier curves-like plot representing gene fusion events
fusionsBezierPlot <- function(fusion_annot, genes_annot) {
  
  ##### Get the genes chromosomes...
  chr1 <- paste0("chr", fusion_annot$SEQNAME)
  chr2 <- paste0("chr", fusion_annot$SEQNAME.1)
  
  ##### ...positions
  pos1 <- fusion_annot$GENESEQSTART
  pos2 <- fusion_annot$GENESEQSTART.1
  
  ##### ... and the events type
  type <- paste(fusion_annot$pizzly.fusions.annot.fusions_abundant, fusion_annot$pizzly.fusions.annot.fusions_cancer, sep = "-") 
  type <- gsub("Yes-Yes","Abundant and cancer gene(s)", type)
  type <- gsub("Yes--","Abundant transcript(s)", type)
  type <- gsub("--Yes","Cancer gene(s)", type)
  type <- gsub("---","Other", type)

  #### Prepare x-axis coordinates info for ggplot
  ##### This part of the script converts the genomic positions from hg38 to coordinates that can be plotted on the ggplot x-axis.
  ##### Start with calculating the whole genome length. Here we consider chromosomes 1-22, X and Y
  genome.length <- sum(seqlengths(Hsapiens)[1:24])
  
  ##### Now calculate fake chromosomes' start positions so that they match with the x-axis coordinates in the ggplot
  chrs_fake_starts <- vector("list", 24)
  chrs_fake_starts  <- setNames(chrs_fake_starts,  names(Hsapiens)[1:24] )
  
  ##### Chromosome 1 has coordingate 0
  chrs_fake_starts[["chr1"]] <- 0
  
  ##### The coordinates for the remaining chromosomes will be calculated by adding the lengths of individual preceding chromosomes
  length_sum <- 0
  for ( i in 2:length(chrs_fake_starts) ) {
    #cat(paste("\nThe fake start position for " , names(chrs_fake_starts)[i], " is ", length_sum + as.numeric(seqlengths(Hsapiens)[[i-1]]), sep=""))
  # cat(paste("\nLength of " , names(chrs_fake_starts)[i-1], " = ", as.numeric(seqlengths(Hsapiens)[[i-1]]), " and the sum of the preceding chromosomes = ", length_sum, ".\n\n", sep=""))
    length_sum <- length_sum + as.numeric(seqlengths(Hsapiens)[[i-1]])
    chrs_fake_starts[[names(Hsapiens)[i]]] <- length_sum
  }
  
  ##### Calculate the coordinates for x-axis labels (chr1, chr2...) for ggplot by adding the half-lenght of each chrosomome to its fake start
  chrs_fake_label.pos <- vector("list", 24)
  chrs_fake_label.pos  <- setNames(chrs_fake_label.pos,  names(Hsapiens)[1:24] )
  for ( i in 1:length(chrs_fake_starts) ) {
    chrs_fake_label.pos[[names(Hsapiens)[i]]] <- seqlengths(Hsapiens)[[i]]/2 + chrs_fake_starts[[names(Hsapiens)[i]]]
    # cat(paste("\nThe x-axis coordinate for " , names(chrs_fake_starts)[i], " label is ", chrs_fake_label.pos[[names(Hsapiens)[i]]], " = ",  seqlengths(Hsapiens)[[i]]/2, " (half-length) + ", chrs_fake_starts[[names(Hsapiens)[i]]]," (fake start)", sep=""))
  }
  
  #### Calculate ggplot x-axis coordinates for fusion events
  ##### Calculate the coordinates to draw bezier curves by adding the fusion events position info to the fake start coordinates of corresponding chromosomes
  pos1_fake <- vector("list", nrow(fusion_annot))
  pos2_fake <- vector("list", nrow(fusion_annot))
  
  for ( i in 1:nrow(fusion_annot) ) {
    # cat(paste("\nCalculations for fusion event: " , paste( chr1[i], pos1[i], sep=" " ), "-",  paste( chr2[i], pos2[i], sep=" " ), sep=""))
    # cat(paste("\nThe x-axis coordinate for position 1 is ", chrs_fake_starts[[chr1[i]]] + pos1[i], " = ",  chrs_fake_starts[[chr1[i]]], " (the fake start of ", chr1[i],") + ", pos1[i], " (the real position 1)", sep=""))
    # cat(paste("\nThe x-axis coordinate for position 2 is ", chrs_fake_starts[[chr2[i]]] + pos2[i], " = ",  chrs_fake_starts[[chr2[i]]], " (the fake start of ", chr2[i],") + ", pos2[i], " (the real position 2).\n", sep=""))
    pos1_fake[[i]] <- chrs_fake_starts[[chr1[i]]] + pos1[i]
    pos2_fake[[i]] <- chrs_fake_starts[[chr2[i]]] + pos2[i]
  }
  
  ##### Get random number for the bezier curves' heigths and caluclate the middle point for each bezier curve
  beziers.height <- runif(nrow(fusion_annot), 1, 2)
  beziers.mid <- unlist(pos1_fake)+(unlist(pos2_fake)-unlist(pos1_fake))/2
  
  ##### Create data-frame with beziers curves info
  beziers <- data.frame(
      x = c(rbind( unlist(pos1_fake), beziers.mid, unlist(pos2_fake) )),
      y = c(rbind( 0.2, beziers.height, 0.2 ) ),
      type = rep( paste( chr1, make.names(pos1, unique=TRUE), chr2, make.names(pos2, unique=TRUE), sep="_" ), each=3),
        group = rep( type, each=3)
  )
  
  ##### Generate a bezier curves-like plot representing fusion events
  p <- ggplot() + geom_bezier(aes(x= x, y = y, group = type, color = group ), data = beziers, show.legend = TRUE, size = 0.2) +
        ##### Remove default axes labels and grey backgroud
        theme(axis.title.x=element_blank(), axis.text.x= element_blank(), axis.ticks.x=element_blank(), axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank(),
        ##### ...and the grey backgroud
                    panel.background = element_rect(fill = NA),
        ##### ...change the legend parameters
                    legend.title=element_text(size=6), legend.text=element_text(size=10), legend.key.size = unit(1,"line"), legend.key= element_blank(), legend.position = c(0.85,0.7) ) +
        ##### Set the axes limits
        scale_x_continuous(limits = c(1, genome.length)) +
        scale_y_continuous(limits = c(0, 2)) +
        ##### Add chromosomes boundaries
        geom_segment(aes(x = c(1,unlist(chrs_fake_starts)[2:24],genome.length) , xend = c(1,unlist(chrs_fake_starts)[2:24],genome.length), y = 0, yend = 0.2), colour = 'grey', size = 0.2) +
        labs( color = "") +
        ##### Add chromosomes labels
        annotate(geom = 'text', label = names(chrs_fake_label.pos), x = unlist(chrs_fake_label.pos), y = 0.1, size = 2, angle = 45)
      ##### Add gene fusion labels
      #annotate(geom = 'text', label = paste(fusion_annot$fusion_data.geneA.name, fusion_annot$fusion_data.geneB.name, sep="-"), x = beziers.mid, y = beziers.height-0.5, size = 1.5)
  
  return( p )
}
##### Load libraries
suppressMessages(library(edgeR))
suppressMessages(library(preprocessCore))
suppressMessages(library(rapportools))
suppressMessages(library(edgeR))
suppressMessages(library(kableExtra))
suppressMessages(library(tidyverse))
suppressMessages(library(knitr))
suppressMessages(library(magick))
suppressMessages(library(matrixStats))
suppressMessages(library(ggplot2))
suppressMessages(library(ggforce))
suppressMessages(library(NOISeq))
suppressMessages(library(package=paste0("EnsDb.Hsapiens.v", params$ensembl_version), character.only = TRUE))
suppressMessages(library(package=paste0("BSgenome.Hsapiens.UCSC.hg", params$ucsc_genome_assembly), character.only = TRUE)) # required to get chromosomes lengts for fusionsBezierPlot function generating Bezier plot to present gene fusions in the genomics context
##### Load reference datasets
##### Define the reference datasets based on user-defined tissue of sample origin
ref_tissue <- list( "pancreas" = c(paste(params$ref_data_dir, "Datasets_list_PDAC.txt", sep="/"), "Normal (pancreas)", "PDAC"), "cervix" = c(paste(params$ref_data_dir, "Datasets_list_cervical_SCC.txt", sep="/"), "Normal (cervix uteri)", "Cervical squamous cell carcinoma") )

tissue <- params$tissue
normal_group <- ref_tissue[[tissue]][2]
cancer_group <- ref_tissue[[tissue]][3]

##### Create a list with reference datasets
ref_datasets <- c(tissue)
ref_datasets.list <- vector("list", length(ref_datasets))
names(ref_datasets.list) <- ref_datasets

##### Create a list with various sets of genes
ref_genes <- c("genes_cancer", "oncokb_genes", "genes_immune")
ref_genes.list <- vector("list", length(ref_genes))
names(ref_genes.list) <- ref_genes

##### Create a list with cancer genes annotations
caner_genes_annot <- c("oncokb_clin_vars", "oncokb_all_vars")
caner_genes_annot.list <- vector("list", length(caner_genes_annot))
names(caner_genes_annot.list) <- caner_genes_annot

##### Get patient data dir and sample file name
dataDir <- unlist(strsplit(params$count_file, split='/', fixed=TRUE))
sampleName.orig <- gsub("-ready.counts", "", dataDir[length(dataDir)])
dataDir <- paste(dataDir[-c(length(dataDir))], collapse ="/")

##### Read in reference datasets and merge them with sample data. This part outputs a vector with first element containing the merged data and second element containing merged targets info
ref_datasets.list[[tissue]] <- combineDatasets(params$sample_name, params$count_file, ref_tissue[[tissue]][1])
names(ref_datasets.list[[tissue]]) <- c("combined_data", "sample_annot")

##### Read in selected genes list
ref_genes.list[["genes_cancer"]] <- read.table(paste(params$ref_data_dir, params$genes_cancer, sep="/"), sep="\t", as.is=TRUE, header=FALSE, row.names=NULL, quote="")[ ,1 ]
ref_genes.list[["oncokb_genes"]] <- read.table(paste(params$ref_data_dir, params$oncokb_genes, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="", comment.char = "")
ref_genes.list[["genes_immune"]] <- read.table(paste(params$ref_data_dir, params$genes_immune, sep="/"), sep="\t", as.is=TRUE, header=FALSE, row.names=NULL, quote="")

##### Read in mutation data for investigate sample
##### Check if PCGR (mutation) output file exists
if ( file.exists(paste(dataDir, "pcgr/pcgr-somatic.csv", sep = "/")) ) {
  
  ref_genes.list[["pcgr"]] <- read.table(paste(dataDir, "pcgr/pcgr-somatic.csv", sep = "/"), sep=",", as.is=TRUE, header=TRUE, row.names=NULL, fill = TRUE)
  
  ##### Simplify the variants types
  ref_genes.list[["pcgr"]]$CONSEQUENCE <- gsub("_variant", "", ref_genes.list[["pcgr"]]$CONSEQUENCE)
  ref_genes.list[["pcgr"]]$CONSEQUENCE <- gsub("_", " ", ref_genes.list[["pcgr"]]$CONSEQUENCE)
  
  ##### Simplify tiers' annotations and AFs
  ref_genes.list[["pcgr"]]$TIER <- gsub("TIER ", "", ref_genes.list[["pcgr"]]$TIER)
  
  ref_genes.list[["pcgr"]]$AF_TUMOR <- round(ref_genes.list[["pcgr"]]$AF_TUMOR, digits = 2)
  ref_genes.list[["pcgr"]]$AF_NORMAL <- round(ref_genes.list[["pcgr"]]$AF_NORMAL, digits = 2)
  
} else {
  ref_genes.list[["pcgr"]] <- NULL
}

##### Check if purple (CN) output file exists
if ( file.exists(paste(dataDir, "purple/purple.gene.cnv", sep = "/")) ) {
  
  ref_genes.list[["purple"]] <- read.table(paste(dataDir, "purple/purple.gene.cnv", sep = "/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, fill = TRUE)
  
} else {
  ref_genes.list[["purple"]] <- NULL
}

##### Read in OncoKB (http://oncokb.org) annotations
caner_genes_annot.list[["oncokb_clin_vars"]] <- read.table(paste(params$ref_data_dir, params$oncokb_clin_vars, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="")
caner_genes_annot.list[["oncokb_all_vars"]] <- read.table(paste(params$ref_data_dir, params$oncokb_all_vars, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="", fill = TRUE)

##### Read in CIViC (https://civicdb.org/) annotations
caner_genes_annot.list[["civic_var_summaries"]] <- read.table(paste(params$ref_data_dir, params$civic_var_summaries, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="", fill = TRUE)
caner_genes_annot.list[["civic_clin_evid"]] <- read.table(paste(params$ref_data_dir, params$civic_clin_evid, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="", fill = TRUE)

##### Read in Cancer Biomarkers database (https://www.cancergenomeinterpreter.org/biomarkers) annotations
caner_genes_annot.list[["cancer_biomarkers_trans"]] <- read.table(paste(params$ref_data_dir, params$cancer_biomarkers_trans, sep="/"), sep="\t", as.is=TRUE, header=TRUE, row.names=NULL, quote="", fill = TRUE)
##### Data transformation and filtering
##### For differential expression and related analyses, gene expression is rarely considered at the level of raw counts since libraries sequenced at a greater depth will result in higher counts. Rather, it is common practice to transform raw counts onto a scale that accounts for such library size differences. Here we convert the read count data into log2-counts per million (***log-CPM***) using function from *[edgeR](https://bioconductor.org/packages/release/bioc/html/edgeR.html){target="_blank"}* package. Genes with very low counts across all libraries provide little evidence for differential expression. In the biological point of view, a gene must be expressed at some minimal level before it is likely to be translated into a protein or to be biologically important. In addition, the pronounced discretenes of these counts interferes with some of the statistical approximations that are used later in the pipeline. These genes should be filtered out prior to further analysis.

##### Loop through combined datasets
for ( tissue in names(ref_datasets.list) ) {
  
  counts <- ref_datasets.list[[tissue]][["combined_data"]]
  target <- ref_datasets.list[[tissue]][["sample_annot"]]
  
  ##### Create EdgeR DGEList object
  y <- DGEList(counts=counts,  group=target$Target)
  
  ##### Add datasets name for each sample
  y$samples$dataset <- target$Dataset
  
  ##### Filtering to remove low expressed genes. Users should filter with CPM rather than filtering on the counts directly, as the latter does not account for differences in library sizes between samples. Here we keep only genes that have CPM of 1
  keep <- rowSums(cpm(y)>1) >= ncol(counts)/10
  y.filtered <- y[keep, , keep.lib.sizes=FALSE]
  
  ref_datasets.list[[tissue]][["combined_data_processed"]] <- y.filtered
}

# cat("The CPM of 1 (cut-off for removing low expressed genes) corresponds to", round(min(as.numeric(colSums(counts)*1e-6)), digits=0), "reads in sample with the lowest sequencing depth, and", round(max(as.numeric(colSums(counts)*1e-6)), digits=0), "reads in sample with the greatest sequencing depth\n")
##### Data normalisation
##### During the sample preparation or sequencing process, external factors that are not of biological interest can affect the expression of individual samples. For example, samples processed in the first batch of an experiment can have higher expression overall when compared to samples processed in a second batch. It is assumed that all samples should have a similar range and distribution of expression values. Normalisation for sample-specific effectss is required to ensure that the expression distributions of each sample are similar across the entire experiment. Normalisation by the method of *[trimmed mean of M-values](https://www.ncbi.nlm.nih.gov/pubmed/20196867){target="_blank"} (TMM)* is performed using the *calcNormFactors* function in *[edgeR](https://bioconductor.org/packages/release/bioc/html/edgeR.html){target="_blank"}*. The normalisation factors calculated here are used as a scaling factor for the library sizes. TMM is the recommended for most RNA-Seq data where the majority (more than half) of the genes are believed not differentially expressed between any pair of the samples.

##### Adjust for RNA composition effect. Calculate scaling factors for the library sizes with calcNormFactors function using trimmed mean of M-values (TMM) between each pair of samples. Note, that the raw read counts are used to calculate the normalisation factors

##### Loop through combined datasets
for ( tissue in names(ref_datasets.list) ) {
  
  y.filtered <- ref_datasets.list[[tissue]][[3]]
  
  y.filtered.norm <- calcNormFactors(y.filtered, method = "TMM")
  
  ##### Transformations from the raw-scale to CPM
  y.filtered.norm.cpm <- cpm(y.filtered.norm, normalized.lib.sizes=TRUE, log=TRUE, prior.count=0.25)
  
  ref_datasets.list[[tissue]][[3]] <- y.filtered.norm.cpm
}
##### Principal component analysis (PCA)
##### Loop through combined datasets and perform PCA
for ( tissue in names(ref_datasets.list) ) {
  
  target <- ref_datasets.list[[tissue]][["sample_annot"]]
  data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
  
  ref_datasets.list[[tissue]][["pca"]] <- pca(data, target)
}
##### Loop through combined, BUT NOT PROCESSED, datasets and annotate ALL genes. This part is mainly required for biotype detection step
for ( tissue in names(ref_datasets.list) ) {
  
  ##### Convert data into a data frame to make the Ensembl ID and gene symbol matches (with merge function)
  data <- ref_datasets.list[[tissue]][["combined_data"]]
  data.df <- as.data.frame(cbind(rownames(data), data))
  colnames(data.df)[1] <- "ENSEMBL"

  ##### Get genes annotation and genomic locations (Use Ensembl annotation version 86 (Oct 2016), the most recent as of Oct 2018)
  edb <- EnsDb.Hsapiens.v86
  
  ##### Get keytypes for gene SYMBOL
  keys <- keys(edb, keytype="GENEID")
  
  ##### Get genes genomic coordiantes
  gene_info <- ensembldb::select(edb, keys=keys, columns=c("GENEID", "GENEBIOTYPE", "GENENAME", "SEQNAME", "GENESEQSTART", "GENESEQEND"), keytype="GENEID")
  names(gene_info) <- gsub("GENEID", "ENSEMBL", names(gene_info))
  names(gene_info) <- gsub("GENENAME", "SYMBOL", names(gene_info))
  
  ##### Limit genes annotation to those genes for which sample expression measurments are available
  gene_info <-  gene_info[ gene_info$ENSEMBL %in% data.df$ENSEMBL,  ]
  
  ##### Remove rows with duplicated ENSEMBL IDs
  gene_info = gene_info[!duplicated(gene_info$ENSEMBL),]
  rownames(gene_info) <- gene_info$ENSEMBL
  
  ##### Remove rows with duplicated gene symbols (Y_RNAs, SNORs, LINC0s etc)
  gene_info = gene_info[!duplicated(gene_info$SYMBOL),]
  
  ##### Merge genes genomic coordinates info with their annotation and expression data
  data.annot <- merge(gene_info, data.df, by = "ENSEMBL", all.x = FALSE)
  rownames(data.annot) <- data.annot$ENSEMBL
  
  ##### Get data matrix with gene symbols
  ref_datasets.list[[tissue]][["gene_annot_all"]] <- data.annot[, c("SYMBOL", "GENEBIOTYPE", "ENSEMBL", "SEQNAME", "GENESEQSTART", "GENESEQEND")]
}
##### Loop through combined datasets and annotate genes
for ( tissue in names(ref_datasets.list) ) {
  
  ##### Convert data into a data frame to make the Ensembl ID and gene symbol matches (with merge function)
  data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
  data.df <- as.data.frame(cbind(rownames(data), data))
  colnames(data.df)[1] <- "ENSEMBL"
  
  ##### Merge genes genomic coordinates info with their annotation and expression data
  data.annot <- merge(gene_info, data.df, by = "ENSEMBL", all.x = FALSE)
  
  ##### Keep only genes fo which gene symbol is available
  data.annot <- data.annot[!(is.na(data.annot$SYMBOL) | data.annot$SYMBOL==""), ]
  rownames(data.annot) <- data.annot$SYMBOL
  
  ##### Get data matrix with gene symbols
  #data <- data.annot[, colnames(data)]
  #data <- apply(data.annot[, colnames(data)], 2, as.numeric)
  #rownames(data) <- data.annot$SYMBOL
  ref_datasets.list[[tissue]][["combined_data_processed"]] <- apply(data.annot[, colnames(data)], 2, as.numeric)
  rownames(ref_datasets.list[[tissue]][["combined_data_processed"]]) <- data.annot$SYMBOL
  ref_datasets.list[[tissue]][["gene_annot"]] <- data.annot[, c("SYMBOL", "GENEBIOTYPE", "ENSEMBL", "SEQNAME", "GENESEQSTART", "GENESEQEND")]
}
##### Combine PMCC gene panel and OncoKB cancer genes
cancer_genes <- ref_genes.list[["oncokb_genes"]]
cancer_genes$PMCC <- rep("No", nrow(cancer_genes))

for ( gene in unlist(ref_genes.list[["genes_cancer"]]) ) {

  ##### Check if the PMCC genes is aleady reported in OncoKB
  if ( gene %in% cancer_genes$Hugo.Symbol ) {
   
    cancer_genes[ cancer_genes$Hugo.Symbol==gene, ]$PMCC <- "Yes"
    cancer_genes[ cancer_genes$Hugo.Symbol==gene, 2] <- as.numeric(cancer_genes[ cancer_genes$Hugo.Symbol==gene, 2]) + 1
    
  ##### Add if not present
  } else {
    
    cancer_genes <- rbind(cancer_genes, c(gene, 1, "No", rep("", 8), "Yes"))
  }
}

rownames(cancer_genes) <- cancer_genes$Hugo.Symbol
names(cancer_genes) <- c("Gene", "No. of resources", "OncoKB", "Oncogene", "TSG", "MSK-IMPACT", "MSK-HEME", "Foundation One", "Foundation One Heme", "Vogelstein", "Sanger CGC", "PMCC")
cancer_genes <- cancer_genes[,c("Oncogene", "TSG", "No. of resources", "OncoKB", "MSK-IMPACT", "MSK-HEME", "Foundation One", "Foundation One Heme", "Vogelstein", "Sanger CGC", "PMCC")]
cancer_genes[ cancer_genes=="No" ] <- "-"
cancer_genes[ cancer_genes=="" ] <- "-"

ref_genes.list[["oncokb_genes"]] <- cancer_genes
##### Combine expression data with mutation and CN data if available
if ( !is.null(ref_genes.list[["purple"]]) ) {
  
  cn_data <- ref_genes.list[["purple"]]
  expr_data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
  
  ##### Extract partient sample data
  expr_data <- expr_data[, ncol(expr_data)]
  
  ##### Perform Z-score transformation of the expression values
  expr_data.z <- as.vector(scale(expr_data))
  names(expr_data.z) <- names(expr_data)
    
  ##### Remove entries with missing gene symbol (mainly variants in intergenic regions)
  cn_data <- cn_data[ cn_data$Gene %!in% "", ]
  
  ##### Calculate the mean CN for each gene
  cn_data$MeanCopyNumber <- rowMeans(cbind(cn_data$MinCopyNumber, cn_data$MaxCopyNumber))
  
  ##### Deal with negative CN values
  cn_data$MeanCopyNumber[ cn_data$MeanCopyNumber < 0 ] <- 0
  
  ##### Get the percentiles from from the CN values
  #cn_data.percent <- quantile(cn_data$MeanCopyNumber, probs = seq(0, 1, .05), na.rm = TRUE)
  
  # ##### Draw histogram of CN data
  # p <- plot_ly(x = cn_data$MeanCopyNumber, type = 'histogram', name = "Copy-number data", width = 800, height = 500) %>%
  #   
  #   ##### Add 10th percentile threshold
  #   add_lines(y = seq(0,1000, 100), x = rep(cn_data.percent[2],11), 
  #               line = list(color = "black", dash = "dash"), opacity = 0.4,
  #               name = "10th percentile", showlegend = TRUE) %>%
  #   
  #   ##### Add 50th percentile
  #   add_lines(y = seq(0,1000, 100), x = rep(cn_data.percent[11],11), 
  #               line = list(color = "black", dash = "dash"), opacity = 0.7,
  #               name = "50th percentile", showlegend = TRUE) %>%
  #   
  #   ##### Add 90th percentile threshold
  #   add_lines(y = seq(0,1000, 100), x = rep(cn_data.percent[20],11), 
  #               line = list(color = "black", dash = "dash"), opacity = 1,
  #               name = "80th percentile", showlegend = TRUE) %>%
  #   
  #   layout(xaxis = list( range=c(0,3), title = "Copy-number values"), yaxis = list( title = "Frequency"), margin = list(l=50, r=50, b=50, t=50, pad=4), autosize = F)
  
 ##### Keep only altered genes with CN values below 10th percentile and above 90th percentile
  #cn_data <- cn_data[ cn_data$MaxCopyNumber < cn_data.percent[2] | cn_data$MinCopyNumber > cn_data.percent[20], ]

 ##### Keep only altered genes with CN values below loss threshold (default CN value = 1) and above gain threshold (default CN value = 4)
 cn_data <- cn_data[ cn_data$MeanCopyNumber < params$cn_loss | cn_data$MeanCopyNumber > params$cn_gain, ]
  
 ##### Add mutation data if available
  if ( !is.null(ref_genes.list[["pcgr"]]) ) {
  
    mut_data <- ref_genes.list[["pcgr"]]

    ##### Remove entries with missing gene symbol (mainly variants in intergenic regions)
    mut_data <- mut_data[ mut_data$SYMBOL %!in% "", ]
  
    ##### Prepare mutation data to include multiple mutations per gene
    ##### Initiate variable for the gene mutation status for each gene
    gene.mut <- as.matrix(rep("not mutated", length(expr_data.z)))
    colnames(gene.mut) <- "Mutation"
    rownames(gene.mut) <- names(expr_data.z)
  
    for ( i in 1:nrow(gene.mut) ) {
    
      ##### Check if any mutations are reported for each gene
      if (  rownames(gene.mut)[i] %in% mut_data$SYMBOL ) {
      
        ##### Deal with multiple mutations per gene
        if ( length(mut_data[ mut_data$SYMBOL %in% rownames(gene.mut)[i],  ]$CONSEQUENCE) > 1 ) {
  
          gene.mut[ rownames(gene.mut)[i],"Mutation" ] <- "multiple hits"
  
        } else {
        
          gene.mut[ rownames(gene.mut)[i],"Mutation" ] <- mut_data[ mut_data$SYMBOL %in% rownames(gene.mut)[i],  ]$CONSEQUENCE
        }
      }
    }
  
    ##### If there is no expression value for a specific gene than assume it's not expressed at all and assign the lowest value observed in that sample
    for ( gene in unique(mut_data$SYMBOL) ) {
    
      if ( gene %!in% rownames(gene.mut) ) {
          
        expr_data.z <- c(expr_data.z, min(expr_data.z))
        names(expr_data.z)[length(expr_data.z)] <- gene
      
        ##### Deal with multiple mutations per gene
        if ( length(mut_data[ mut_data$SYMBOL %in% gene,  ]$CONSEQUENCE) > 1 ) {
  
          gene.mut <- rbind( gene.mut,  "multiple hits")
  
        } else {
        
          gene.mut <- rbind( gene.mut,  mut_data[ mut_data$SYMBOL %in% gene,  ]$CONSEQUENCE )
        }
        rownames(gene.mut)[nrow(gene.mut)] <- gene
      }
    }

    ##### Subset expression, mutation and copy-number data to include only overlapping genes
    genes.intersect <- intersect(intersect(rownames(gene.mut), cn_data$Gene), names(expr_data.z))
    
    gene.mut.sub <- gene.mut[ rownames(gene.mut) %in% genes.intersect, ]
    cn_data.sub <- cn_data[ cn_data$Gene %in% genes.intersect, ]
    expr_data.z.sub <- expr_data.z[ names(expr_data.z) %in% genes.intersect ]
    
    ##### Make sure thay are all in the same order
    gene.mut.sub <- gene.mut.sub[ genes.intersect ]
    rownames(cn_data.sub) <- cn_data.sub$Gene
    cn_data.sub <- cn_data.sub[ genes.intersect,  ]
    expr_data.z.sub <- expr_data.z.sub[ genes.intersect  ]
    
    ##### Prepare data frame
    ref_datasets.list[[tissue]][["expr_mut_cn_data"]] <- data.frame(names(expr_data.z.sub), cn_data.sub$MeanCopyNumber, expr_data.z.sub, gene.mut.sub)
    colnames(ref_datasets.list[[tissue]][["expr_mut_cn_data"]]) <- c("Gene", "CN", "mRNA", "Mutation")
    
  } else {
    
    ##### Skip the step for processing mutation info and deal with expression and copy-number data
    ##### Subset expression and copy-number data to include only overlapping genes
    genes.intersect <- intersect(cn_data$Gene, names(expr_data.z))
    
    cn_data.sub <- cn_data[ cn_data$Gene %in% genes.intersect, ]
    expr_data.z.sub <- expr_data.z[ names(expr_data.z) %in% genes.intersect ]
    
    ##### Make sure thay are all in the same order
    rownames(cn_data.sub) <- cn_data.sub$Gene
    cn_data.sub <- cn_data.sub[ genes.intersect,  ]
    expr_data.z.sub <- expr_data.z.sub[ genes.intersect  ]
    
    ##### Prepare data frame
    ref_datasets.list[[tissue]][["expr_mut_cn_data"]] <- data.frame(names(expr_data.z.sub), cn_data.sub$MeanCopyNumber, expr_data.z.sub)
    colnames(ref_datasets.list[[tissue]][["expr_mut_cn_data"]]) <- c("Gene", "CN", "mRNA")
  }
}
#read in the pizzly fusion calls
pizzly.fusions <- read.table(file = paste(dataDir, "pizzly", paste0(sampleName.orig, "-flat.tsv"), sep = "/"), header = TRUE)
quant <- read.table(file = paste(dataDir, "kallisto/quant_pizzly_post/abundance.tsv", sep = "/"), header = TRUE)
#sort and filter quantification file on tpm values. First, grep only the transcript ids for fusion genes from quantification #file. Currently filtering on quantiles. Selected 0.997 because that reduces the #final fusion calls to the value we are #interested in (~15)
quant.fusions.only.transcripts <- quant[grep(":", quant$target_id), ]
quant.sorted.filtered <- dplyr::filter(arrange(quant.fusions.only.transcripts, desc(quant.fusions.only.transcripts$tpm)), tpm >= (quantile(quant.fusions.only.transcripts$tpm, 0.99)))

#initialize an empty dataframe
result <- data.frame()

#let's try using for loop for iterating over pizzly.fusions dataframe and get transcriptID and fusion gene pair information.
#can also filter quant.sorted.filtered$target_id to have only fusion gene target ids (that is two transcripts instead of one-
#this will increase speed

for (row in 1:nrow(pizzly.fusions)){
  y <- strsplit(as.character(pizzly.fusions[row, 7]), "\\;")
  y <- unname(y)
  for (i in 1:length(y[[1]])){
    if (y[[1]][i] %in% quant.sorted.filtered$target_id){
      #creating a new dataframe for the filtered pizzly results
      result <- rbind(result, data.frame(pizzly.fusions[row,]))
    }
  }
}

#remove duplicated values from result filtered using expression count (as multiple transcripts might support fusion between same gene) and sort the results by number of events (first by split count and then paircount)
deduped.result <- unique(result)
idx <- order(deduped.result$splitcount, deduped.result$paircount, decreasing = TRUE)
deduped.sorted.result <- deduped.result[idx, ]

#Extract only those fusion genes that are in cancer genes list
result.cancer_genes <- data.frame()
for (row in 1:nrow(pizzly.fusions)){
  if(pizzly.fusions[row,1] %in% rownames(ref_genes.list[["oncokb_genes"]]) | pizzly.fusions[row,3] %in% rownames(ref_genes.list[["oncokb_genes"]])) {
    #creating a new dataframe for extracting pizzly rows with cancer gene hits
    result.cancer_genes <- rbind(result.cancer_genes, data.frame(pizzly.fusions[row,]))
  }
}

#ordering pizzly's cancer genes results on the basis of read count values
idx2 <- order(result.cancer_genes$splitcount, result.cancer_genes$paircount, decreasing = TRUE)
result.sorted.cancer_genes <- result.cancer_genes[idx2,]

#extracting rows from pizzly results that are not in re-quant i.e. deduped.sorted.result or cancer genes list i.e. result.sorted.cancer_genes
result.other_genes <- pizzly.fusions[ rownames(pizzly.fusions) %!in% c(rownames(result.sorted.cancer_genes), rownames(deduped.sorted.result)), ]

#ordering pizzly's other genes results on the basis of read count values
idx3 <- order(result.other_genes$splitcount, result.other_genes$paircount, decreasing = TRUE)
result.sorted.other_genes <- result.other_genes[idx3,]

#combing all the three above sorted dataframes
pizzly.fusions2 <- rbind(deduped.sorted.result, result.sorted.cancer_genes, result.sorted.other_genes)

##### Flag known fusions based on info from Cancer Biomarkers database (https://www.cancergenomeinterpreter.org/biomarkers)
known_translocations <- caner_genes_annot.list[["cancer_biomarkers_trans"]]
known_translocations$cancer_acronym <- gsub(";", ", ", known_translocations$cancer_acronym)
known_translocations$source <- gsub(";", ", ", known_translocations$source)

##### Extract gene pairs involved in reported gene fusions
trans.pairs <-  strsplit(known_translocations$translocation, split='__', fixed=TRUE)
trans.pairs <- data.frame(matrix(unlist(trans.pairs), nrow=length(trans.pairs), byrow=TRUE), stringsAsFactors = FALSE)
names(trans.pairs) <- c("geneA", "geneB")
known_translocations <- cbind(known_translocations, trans.pairs)
trans.pairs <- apply( trans.pairs , 1 , paste , collapse = "" )
  
##### Add columns for info about reported fusions
pizzly.fusions2 <- cbind(pizzly.fusions2, data.frame(matrix("", ncol = 4, nrow = nrow(pizzly.fusions2)), stringsAsFactors = FALSE))
colnames(pizzly.fusions2)[-c(1:7)] <- c("Reported", "Effector_gene", "Source", "Cancer_acronym")

##### Add annotations about known fusion events
##### Loop through all genes involved in reported gene fusions and check if present in pizzly results
for ( i in 1:nrow(pizzly.fusions2) ) {
  
  geneA <- pizzly.fusions2$geneA.name[i]
  geneB <- pizzly.fusions2$geneB.name[i]
        
    ##### First check if the exact reported gene pairs were detected by pizzly
  if ( paste(geneA, geneB, sep="-") %in% trans.pairs ) {
    
    pizzly.fusions2$Reported[i] <- trans.pairs[ trans.pairs %in% paste(geneA, geneB, sep="-") ]
    
    pizzly.fusions2[ i, c("Effector_gene", "Cancer_acronym", "Source")] <- known_translocations[ trans.pairs %in% paste(geneA, geneB, sep="-"), c("effector_gene", "cancer_acronym", "source") ]
    
 } else if ( paste(geneB, geneA, sep="-") %in% trans.pairs ) {
    
    pizzly.fusions2$Reported[i] <- trans.pairs[ trans.pairs %in% paste(geneB, geneA, sep="-") ]
    
    pizzly.fusions2[ i, c("Effector_gene", "Cancer_acronym", "Source")] <- known_translocations[ trans.pairs %in% paste(geneB, geneA, sep="-"), c("effector_gene", "cancer_acronym", "source") ]
    
  ##### Now check is at lease one of the pizzly results genes was previously reporeted
  } else {
    ##### Check pizzly genes A and genes A in reported fusions
    if ( geneA %in% known_translocations$geneA ) {
      
      known_gene <- known_translocations$geneA %in% geneA
      
      pizzly.fusions2$Reported[i] <- unique(known_translocations$geneA[ known_gene ])
      
      pizzly.fusions2[ i, c("Cancer_acronym", "Source")] <- known_translocations[ known_gene, c("cancer_acronym", "source") ]
        
        ##### Flag if it's effector gene
        if ( geneA == known_translocations[ known_gene , "effector_gene" ]  ) {
          
          pizzly.fusions2[ i, "Effector_gene"] <- as.character(geneA)
        }
      
    ##### Check pizzly genes A and genes B in reported fusions
    } else if ( geneA %in% known_translocations$geneB ) {
      
      known_gene <- known_translocations$geneB %in% geneA
      
      pizzly.fusions2$Reported[i] <- unique(known_translocations$geneB[ known_gene ])
      
      pizzly.fusions2[ i, c("Cancer_acronym", "Source")] <- known_translocations[ known_gene, c("cancer_acronym", "source") ]
        
        ##### Flag if it's effector gene
        if ( geneA == known_translocations[ known_gene , "effector_gene" ]  ) {
          
          pizzly.fusions2[ i, "Effector_gene"] <- as.character(geneA)
        }
    }
    
    ##### Check pizzly genes B and genes A in reported fusions
    if ( geneB %in% known_translocations$geneA ) {
      
      known_gene <- known_translocations$geneA %in% geneB
      
      pizzly.fusions2$Reported[i] <- unique(known_translocations$geneA[ known_gene ])
      
      pizzly.fusions2[ i, c("Cancer_acronym", "Source")] <- known_translocations[ known_gene, c("cancer_acronym", "source") ]
        
        ##### Flag if it's effector gene
        if ( geneB == known_translocations[ known_gene , "effector_gene" ]  ) {
          
          pizzly.fusions2[ i, "Effector_gene"] <- as.character(geneB)
        }
      
    ##### Check pizzly genes B and genes B in reported fusions
    } else if ( geneB %in% known_translocations$geneB ) {
      
      known_gene <- known_translocations$geneB %in% geneB
      
      pizzly.fusions2$Reported[i] <- unique(known_translocations$geneB[ known_gene ])
      
      pizzly.fusions2[ i, c("Cancer_acronym", "Source")] <- known_translocations[ known_gene, c("cancer_acronym", "source") ]
        
        ##### Flag if it's effector gene
        if ( geneB == known_translocations[ known_gene , "effector_gene" ]  ) {
          
          pizzly.fusions2[ i, "Effector_gene"] <- as.character(geneB)
        }
    }
  }
}

##### Rearrange the table to move the transcripts list at the end
pizzly.fusions2 <- pizzly.fusions2 %>% dplyr::select(-"transcripts.list","transcripts.list")

##### Index duplicated rows = these are fusions which involve both highly abundant transcripts and cancer genes and so are duplicated in  pizzly.fusions2 data frame
dup.index <- which( duplicated(pizzly.fusions2) |  duplicated(pizzly.fusions2[nrow(pizzly.fusions2):1, ])[nrow(pizzly.fusions2):1] )

##### Add column indicating fusions containing high abundant transcripts and known cancer genes
fusions_abundant <- c(rep("-", nrow(pizzly.fusions2)))
fusions_abundant[ c(1:nrow(deduped.result)) ] <- "Yes"
fusions_cancer <- c(rep("-", nrow(pizzly.fusions2)))
fusions_cancer[ c((nrow(deduped.result)+1):(nrow(deduped.result) + nrow(result.cancer_genes))) ] <- "Yes"

pizzly.fusions3 <- cbind(pizzly.fusions2, fusions_abundant, fusions_cancer)

##### Indicate duplicated rows and flag them as both, those which involve both highly abundant transcripts and cancer genes
pizzly.fusions3$fusions_abundant[ dup.index  ] <- "Yes"
pizzly.fusions3$fusions_cancer[ dup.index ] <- "Yes"

##### Remove duplicated rows
if ( any(duplicated(pizzly.fusions3)) ) {
  
  pizzly.fusions3 <- pizzly.fusions3[ -c(which( duplicated(pizzly.fusions3) )), ]
}

##### Re-order data frame to prioritise fusions with highly abundant transcripts and cancer genes
pizzly.fusions3 <- pizzly.fusions3[ rev(with(pizzly.fusions3, order(fusions_abundant, fusions_cancer))), ]
##### Get the number of reads supporting fusions of interest
result_reads <- data.frame()

for (row in 1:nrow(deduped.result)){
  y <- strsplit(as.character(deduped.result[row, 7]), "\\;")
  y <- unname(y)
  for (i in 1:length(y[[1]])){
    if (y[[1]][i] %in% quant.sorted.filtered$target_id){
      #creating a new dataframe for the reads supporting individual transcript ID for each filtered fusion gene pair 
      result_reads_inter <- data.frame(deduped.result[row,1:6])
      result_reads_inter$transcriptID = y[[1]][i]
      tpm = quant.sorted.filtered[grep(y[[1]][i], quant.sorted.filtered$target_id), ]$tpm 
      result_reads_inter$tpm = tpm
      result_reads <- rbind(result_reads, result_reads_inter)
    }
  }
}

##### Rearrange the table to move the transcripts list at the end
result_reads <- result_reads %>% dplyr::select(-"transcriptID","transcriptID")
##### Annotate fusion genes
##### Get data to annotate fusion genes
fusion_genes_annot <- ref_datasets.list[[tissue]][["gene_annot_all"]][ , c("ENSEMBL", "SYMBOL", "SEQNAME", "GENESEQSTART", "GENESEQEND") ]

##### Keep only fusions for which both genes have gene symbol (and genomics location) available
pizzly.fusions3 <- pizzly.fusions3[ pizzly.fusions3$geneA.id %in% fusion_genes_annot$ENSEMBL, ]
pizzly.fusions3 <- pizzly.fusions3[ pizzly.fusions3$geneB.id %in% fusion_genes_annot$ENSEMBL, ]

pizzly.fusions.annot <- pizzly.fusions3
pizzly.fusions.annot$order <- 1:nrow(pizzly.fusions.annot)

##### Get genomic info for fusions genes
fusion_annot1 <- merge(fusion_genes_annot, pizzly.fusions.annot[ , c("geneA.id", "order")], by = 1, sort=FALSE)
fusion_annot1 <- fusion_annot1[ order(fusion_annot1$order), ]
fusion_annot2 <- merge(fusion_genes_annot, pizzly.fusions.annot[ , c("geneB.id", "order")], by = 1, sort=FALSE)
fusion_annot2 <- fusion_annot2[ order(fusion_annot2$order), ]

fusion_annot <- cbind(fusion_annot1, fusion_annot2, pizzly.fusions.annot$fusions_abundant, pizzly.fusions.annot$fusions_cancer)
colnames(fusion_annot) = make.names(colnames(fusion_annot), unique=TRUE)
##### Summarise the reference cohorts samples
target <- ref_datasets.list[[tissue]][["sample_annot"]]

ref_normal <- table(target$Target)[names(table(target$Target))==normal_group]
ref_cancer <- table(target$Target)[names(table(target$Target))==cancer_group]

Input data

Summary

The following reference patient cohorts were used for the analysis:

Out of the 52683 input genes:

  • 18304 genes have reliably detected expression
  • 34370 are either not expressed or their expression level is too low to be detected
  • 9 genes were ignored due to lack of HGNC-approved gene symbol

NOTE: the 34370 genes with no/low expression are indicated in BLANK cells with missing values in expression summary tables in Mutated genes, Copy-number altered genes, Cancer genes and Immune Response Markers sections.


- Biotype detection

Summary of biological classification of detected features in patient’s sample, as well as in samples from cancer patients and healthy individuals. In RNA-seq data it is expected that most of the genes will be protein-coding so detecting an enrichment of any other biotype could point to a potential abnormal contamination.

##### Use NOISeq package () https://bioconductor.org/packages/release/bioc/vignettes/NOISeq/inst/doc/NOISeq.pdf ) to perfrom biotype detection analysis
##### Get the data and genes annotation info
data <- ref_datasets.list[[tissue]][["combined_data"]]
targets <- ref_datasets.list[[tissue]][["sample_annot"]][, c("Dataset", "Target")]
targets$Target[targets$Target==normal_group] <- "Normal"
targets$Target[targets$Target==params$sample_name] <- "Patient"
targets$Target <- as.factor(targets$Target)

##### Use the genes annotation to define gene biotypes
data_annot <- ref_datasets.list[[tissue]][["gene_annot_all"]]

##### Keep only genes for which annotation is available
gene_biotypes <- data_annot$GENEBIOTYPE
names(gene_biotypes) <- rownames(data_annot)

##### Convert data into a NOISeq object
NOISeq_data <- readData( data = data, biotype = gene_biotypes, factors = targets)

##### Perfrom biotype detection analysis
NOISeq_biodetection <- NOISeq::dat(NOISeq_data, k = 0, type = "biodetection", factor = "Target")

##### Compute expression distribution per biotype
NOISeq_countsbio = NOISeq::dat(NOISeq_data, factor = "Target", type = "countsbio")

Per-sample

Biodetection plot

Proportion of various features detected within each sample/cohort.

##### Generate per-sample biodetection plot
for ( sample in c("Patient", cancer_group, "Normal") ) {

  explo.plot(NOISeq_biodetection, samples = sample, plottype = "persample", verbose=FALSE)
}

Plot legend The GRAY bar corresponds to the percentage of each biotype in the genome (i.e. in the whole set of features detected), the stripped RED colour bar is the proportion of detected biotypes in the sample, and the solid RED colour bar is the percentage of each biotype within the sample. The vertical GREEN line separates the most abundant biotypes (in the left-hand side, corresponding to the left axis scale) from the rest (in the right-hand side, corresponding to the right axis scale).


Features distribution

Distribution of expression values in each feature type within each sample/cohort.

##### Generate per-sample biodetection plot
for ( sample in c("Patient", cancer_group, "Normal") ) {
  
  explo.plot(NOISeq_countsbio, samples = sample, plottype = "boxplot", norm = FALSE)
}

Plots legend Box-plot illustrating the distribution of expression values (counts per million (CPM), y-axis) in each feature type. The number of detected features is presented in the upper side of the plot.


Cross-groups comparison

Proportion test for protein coding features to test if the relative abundance of that biotype is different in two samples/cohorts.

Patient vs Normal
##### Generate cross-groups biodetection plot (patient vs normal)
explo.plot(NOISeq_biodetection, samples = c(2, 1), plottype = "comparison", toplot = "protein_coding")

[1] "Percentage of protein_coding biotype in each sample:"
Patient  Normal 
50.4015 41.6282 
[1] "Confidence interval at 95% for the difference of percentages: Patient - Normal"
[1] 8.1718 9.3747
[1] "The percentage of this biotype is significantly DIFFERENT for these two samples (p-value = 1.953e-179 )."

Plots legend The RED and BLUE sets of bars correspond to the relative abundance of protein coding features in patient and normal samples, respectively


Patient vs Cancer
##### Generate cross-groups biodetection plot (patient vs cancer)
explo.plot(NOISeq_biodetection, samples = c(2, 3), plottype = "comparison", toplot = "protein_coding")

[1] "Percentage of protein_coding biotype in each sample:"
Patient    PDAC 
50.4015 39.6504 
[1] "Confidence interval at 95% for the difference of percentages: Patient - PDAC"
[1] 10.1519 11.3503
[1] "The percentage of this biotype is significantly DIFFERENT for these two samples (p-value = 2.225e-269 )."

Plots legend The RED and BLUE sets of bars correspond to the relative abundance of protein coding features in patient and cancer samples, respectively


Cancer vs Normal
##### Generate cross-groups biodetection plot (cancer vs normal)
explo.plot(NOISeq_biodetection, samples = c(3, 1), plottype = "comparison", toplot = "protein_coding")

[1] "Percentage of protein_coding biotype in each sample:"
   PDAC  Normal 
39.6504 41.6282 
[1] "Confidence interval at 95% for the difference of percentages: PDAC - Normal"
[1] -2.5728 -1.3830
[1] "The percentage of this biotype is significantly DIFFERENT for these two samples (p-value = 6.6e-11 )."

Plots legend The RED and BLUE sets of bars correspond to the relative abundance of protein coding features in cancer and normal samples, respectively


Mutated genes

mRNA expression levels of genes containing single nucleotide variants (SNVs) or insertions/deletions (indels), obtained from the PCGR report, in patient’s sample and their average mRNA expression in samples from cancer patients and healthy individuals. NOTE, only PCGR tier 1-3 variants are reported.

- Summary table

Out of the 3977 mutated genes 7 include tier 1-3 variants. Of these, the expression of 7 was reliably measured in patient’s sample. The remaining 0 genes are either not expressed or their expression level is too low to be detected (indicated in BLANK cells with missing values).

Z-scores

##### Generate expression summary table for cancer genes from OncoKB and PMCC
targets <- ref_datasets.list[[tissue]][["sample_annot"]]
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]

##### Consider only genes with mutations calssified within Tiers 1-3
genes <- unique(ref_genes.list[["pcgr"]][ ref_genes.list[["pcgr"]]$TIER %in% c("1", "2", "3" ), ]$SYMBOL)

mut_genes.expr.z <- exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], type = "z")

##### Present the expression summary table
mut_genes.expr.z[[1]]

Table legend The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each cancer gene. Variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided based on information from PCGR report. In case of multiple varaints detected in single gene the variant with the lowest tier is reported and other potential consequences are listed in column CONSEQUENCE_OTHER. Genes are ordered by increasing variants TIER. SD - standard deviation


Percentiles

mut_genes.expr.perc <- exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], type = "perc")

##### Present the expression summary table
mut_genes.expr.perc[[1]]

Table legend The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each cancer gene. Variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided based on information from PCGR report. In case of multiple varaints detected in single gene the variant with the lowest tier is reported and other potential consequences are listed in column CONSEQUENCE_OTHER. Genes are ordered by increasing variants TIER. SD - standard deviation


- Expression profiles

Expression profiles for the top 10 mutated genes with variants annotated with the lowest tier and demonstrating the greatest difference in standarised (Z-score) mRNA expression values between patient’s sample and the average mRNA expression in samples from healthy individuals.

KRAS

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[1]

if ( !is.na(gene) ) {
  ##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

CDKN2A

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[2]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

ROR2

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[3]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

PLEKHO1

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[4]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

  } else {
  cat(paste("There are only", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

KAT6A

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[5]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

RET

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[6]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

TP53

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[7]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

NA

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[8]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are only 7 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

NA

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[9]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are only 7 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

NA

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_genes.expr.z[[2]]$SYMBOL[10]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_genes.expr.z[[2]]$SYMBOL), "mutated genes that include tier 1-3 variants!", sep=" "))
}
There are only 7 mutated genes that include tier 1-3 variants!
##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Copy-number altered genes

Section overlaying the mRNA expression data with per-gene somatic copy-number (CN) data and, where available, mutation status.

- Expression vs copy-number

Scatterplot comparing the per-gene mRNA expression values (y-axis, Z-scores), CN values (x-axis, from PURPLE). If the mutation status information is available then the genes’s colours correspond to the variant(s) consequence (from PCGR). Genes with CN values > 10 or < 0.5 are annotated.

##### Generate scatterplot with per-gene expression values (y-axis), CN values (x-axis) and mutation status info (colours)
suppressMessages(library(plotly))

data <- ref_datasets.list[[tissue]][["expr_mut_cn_data"]]

if ( !is.null(ref_genes.list[["pcgr"]]) && !is.null(ref_genes.list[["purple"]]) ) {
  
  mutCNexprPlot(data = data, mut_data = TRUE, plot_mode = params$plots_mode)
  
} else if ( !is.null(ref_genes.list[["purple"]] ) ) {
  
  mutCNexprPlot(data = data, mut_data = FALSE, plot_mode = params$plots_mode)
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

- Summary table

Out of the 848 genes within gained (CN values > 4) or lossed (CN values < 1) regions the expression of 836 was reliably measured in patient’s sample. The remaining 12 genes are either not expressed or their expression level is too low to be detected (indicated in BLANK cells with missing values).

Gains

Table summarising the mRNA expression values in normal, cancer and patient samples for genes with CN values > 4 (gains), based on patient’s genomic data (from PURPLE), and mutation status if available (from PCGR).

Z-scores
##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours)
##### Keep only genes within CN gains
cn_data <- ref_datasets.list[[tissue]][["expr_mut_cn_data"]]
cn_data <- cn_data[ cn_data$CN > params$cn_gain, ]
cn_data <- cn_data[, "CN", drop=FALSE]

##### Get expression data
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
  
if ( !is.null(ref_genes.list[["pcgr"]]) && !is.null(ref_genes.list[["purple"]]) ) {
  
  mut_cn_expr_genes.expr.gains.z <- exprTable( genes = rownames(cn_data), data = data, cn_data = cn_data, cn_decrease = TRUE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], type = "z")

##### Generate expression summary table for per-gene expression values and CN values
} else if ( !is.null(ref_genes.list[["purple"]] ) ) {
  
  mut_cn_expr_genes.expr.gains.z <- exprTable( genes = rownames(cn_data), data = data, cn_data = cn_data, cn_decrease = TRUE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], type = "z")
}

##### Present the expression, CN and mutation data summary table
mut_cn_expr_genes.expr.gains.z[[1]]

Table legend The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each cancer gene. The CN values based on patient’s genomic data are presented in Patient (CN) column with a horizontal blue bar indicating the CN value of each gene in the context of other genes. If mutation data is availbale, then the variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided on right-hand side based on information from PCGR report (similar to Mutated genes section).Genes are ordered by Patient (CN) column. SD - standard deviation; CN - copy-number


Percentiles
##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours)
if ( !is.null(ref_genes.list[["pcgr"]]) && !is.null(ref_genes.list[["purple"]]) ) {
  
  mut_cn_expr_genes.expr.gains.perc <- exprTable( genes = rownames(cn_data), data = data, cn_data = cn_data, cn_decrease = TRUE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], type = "perc")

##### Generate expression summary table for per-gene expression values and CN values
} else if ( !is.null(ref_genes.list[["purple"]] ) ) {

  mut_cn_expr_genes.expr.gains.perc <- exprTable( genes = rownames(cn_data), data = data, cn_data = cn_data, cn_decrease = TRUE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], type = "perc")
}

##### Present the expression, CN and mutation data summary table
mut_cn_expr_genes.expr.gains.perc[[1]]

Table legend The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each cancer gene. The CN values based on patient’s genomic data are presented in Patient (CN) column with a horizontal blue bar indicating the CN value of each gene in the context of other genes. If mutation data is availbale, then the variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided on right-hand side based on information from PCGR report (similar to Mutated genes section). Genes are ordered by Patient (CN) column. SD - standard deviation; CN - copy-number


Losses

Table summarising the mRNA expression values in normal, cancer and patient samples for genes with CN values < 1 (losses), based on patient’s genomic data (from PURPLE), and mutation status if available (from PCGR).

Z-scores
##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours)
##### Keep only genes within CN losses
cn_data <- ref_datasets.list[[tissue]][["expr_mut_cn_data"]]
cn_data <- cn_data[ cn_data$CN < params$cn_loss, ]
cn_data <- cn_data[, "CN", drop=FALSE]

##### Get expression data
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
  
if ( !is.null(ref_genes.list[["pcgr"]]) && !is.null(ref_genes.list[["purple"]]) ) {
  
  mut_cn_expr_genes.expr.losses.z <- exprTable( genes = rownames(cn_data), data = data, cn_data = cn_data, cn_decrease = FALSE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], type = "z")

##### Generate expression summary table for per-gene expression values and CN values
} else if ( !is.null(ref_genes.list[["purple"]] ) ) {
  
  mut_cn_expr_genes.expr.losses.z <- exprTable( genes = rownames(cn_data), data = data, cn_data = cn_data, cn_decrease = FALSE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], type = "z")
}

##### Present the expression, CN and mutation data summary table
mut_cn_expr_genes.expr.losses.z[[1]]

Table legend The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each cancer gene. The CN values based on patient’s genomic data are presented in Patient (CN) column with a horizontal blue bar indicating the CN value of each gene in the context of other genes. If mutation data is availbale, then the variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided on right-hand side based on information from PCGR report (similar to Mutated genes section).Genes are ordered by Patient (CN) column. SD - standard deviation; CN - copy-number


Percentiles
##### Generate expression summary table for per-gene expression values CN values and mutation status info (colours)
if ( !is.null(ref_genes.list[["pcgr"]]) && !is.null(ref_genes.list[["purple"]]) ) {
  
  mut_cn_expr_genes.expr.losses.perc <- exprTable( genes = rownames(cn_data), data = data, cn_data = cn_data, cn_decrease = FALSE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], mut_annot = ref_genes.list[["pcgr"]][, c("SYMBOL", "TIER", "CONSEQUENCE", "VARIANT_CLASS", "AF_TUMOR", "GENOMIC_CHANGE", "PROTEIN_CHANGE")], type = "perc")

##### Generate expression summary table for per-gene expression values and CN values
} else if ( !is.null(ref_genes.list[["purple"]] ) ) {

  mut_cn_expr_genes.expr.losses.perc <- exprTable( genes = rownames(cn_data), data = data, cn_data = cn_data, cn_decrease = FALSE, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], type = "perc")
}

##### Present the expression, CN and mutation data summary table
mut_cn_expr_genes.expr.losses.perc[[1]]

Table legend The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each cancer gene. The CN values based on patient’s genomic data are presented in Patient (CN) column with a horizontal blue bar indicating the CN value of each gene in the context of other genes. If mutation data is availbale, then the variants’ tier, consequence, class and tumour allele freuqnecy (AF), as well as genomic and protein change are also provided on right-hand side based on information from PCGR report (similar to Mutated genes section). Genes are ordered by Patient (CN) column. SD - standard deviation; CN - copy-number


- Expression profiles

Expression profiles for 5 genes with the highest (gains) and 5 genes with the lowest (losses) CN values and the greatest difference in standarised (Z-score) mRNA expression values between patient’s sample and the average mRNA expression in samples from healthy individuals.

Gains

COCH
suppressMessages(library(plotly))

##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL[1]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL), "genes mapped to detected gained regions!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

STRN3
suppressMessages(library(plotly))

##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL[2]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL), "genes mapped to detected gained regions!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

HECTD1
suppressMessages(library(plotly))

##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL[3]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL), "genes mapped to detected gained regions!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

SCFD1
suppressMessages(library(plotly))

##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL[4]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL), "genes mapped to detected gained regions!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

AP4S1
suppressMessages(library(plotly))

##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL[5]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL), "genes mapped to detected gained regions!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Losses

SRSF10
suppressMessages(library(plotly))

##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL[1]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL), "genes mapped to detected lossed regions!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

RPGR
suppressMessages(library(plotly))

##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL[2]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL), "genes mapped to detected lossed regions!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

RAMP2
suppressMessages(library(plotly))

##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL[3]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL), "genes mapped to detected lossed regions!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

SIGLEC5
suppressMessages(library(plotly))

##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL[4]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL), "genes mapped to detected lossed regions!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

SIGLEC14
suppressMessages(library(plotly))

##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL[5]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL), "genes mapped to detected lossed regions!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Cancer genes

mRNA expression levels of cancer genes in patient’s sample and their average mRNA expression in samples from cancer patients and healthy individuals. These include Peter Mac comprehensive cancer (PMCC) panel genes and genes considered to be cancer genes (listed here and available here) by OncoKB based on their inclusion in the following sequencing panels: MSK-IMPACT, MSK-HEME, Foundation One, Foundation One Heme, Vogelstein and Sanger Cancer Gene Census (CGC).

- Summary table

Out of the 1410 cancer genes the expression of 1245 was reliably measured in patient’s sample. The remaining 165 genes are either not expressed or their expression level is too low to be detected (indicated in BLANK cells with missing values).

Z-scores

##### Generate expression summary table for cancer genes from OncoKB and PMCC
targets <- ref_datasets.list[[tissue]][["sample_annot"]]
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]

cancer_genes.expr.z <- exprTable( genes = rownames(ref_genes.list[["oncokb_genes"]]), data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]], type = "z")

##### Present the expression summary table
cancer_genes.expr.z[[1]]

Table legend The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each cancer gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, and inclusion in various sequencing panels are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


Percentiles

cancer_genes.expr.perc <- exprTable( genes = rownames(ref_genes.list[["oncokb_genes"]]), data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]], type = "perc")

##### Present the expression summary table
cancer_genes.expr.perc[[1]]

Table legend The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each cancer gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, and inclusion in various sequencing panels are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


- Expression profiles

Expression profiles for the top 10 altered cancer genes with the greatest difference in standarised (Z-score) mRNA expression values between patient’s sample and the average mRNA expression in samples from healthy individuals.

KMT2B

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[1]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

PRSS1

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[2]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

CTRC

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[3]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

ASMTL

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[4]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

P2RY8

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[5]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

PIK3R2

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[6]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

HIST1H1E

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[7]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

HOXA13

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[8]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

HIST1H3D

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[9]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

INHBA

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- cancer_genes.expr.z[[2]]$SYMBOL[10]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(cancer_genes.expr.z[[2]]$SYMBOL), "altered cancer genes detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Gene fusion events

Fusion events prioritisation

The prioritisation of pizzly results is performed in two steps.

  1. In the first step, basic idea is to run kallisto to quantify the fusion transcripts (reported by pizzly) and select those which have a decent Transcripts Per Kilobase Million (TPM) support.
  • Create a new index based on the transcriptome and the fusion transcripts identified by pizzly.
  • Run kallisto in normal quantification mode on the expanded index to quantify both normal transcripts and fusions.
  • Select only the fusions with a high TPM value as reported by previous step.
  1. In the second step, fusion genes reported by pizzly are compared with cancer gene list (from Peter Mac comprehensive cancer (PMCC) panel and OncoKB) and report the ones that are present in this list.

NOTE: Only fusion events including genes with available genomics coordinates in Ensembl database version 86 are reported.

- Summary

##### Remove some columns and rename column names for better presentation
pizzly.fusions.clean <- pizzly.fusions3[ , names(pizzly.fusions3) %!in% c("geneA.id", "geneB.id", "transcripts.list") ]
pizzly.fusions.clean <- pizzly.fusions.clean[ , c("geneA.name", "geneB.name", "paircount", "splitcount", "fusions_abundant", "fusions_cancer", "Reported", "Effector_gene", "Source", "Cancer_acronym") ]

##### Create a nice table output (with dataTable)
names(pizzly.fusions.clean) <- c("Gene A", "Gene B", "Pair count", "Split count", "Abundant transcript(s)", "Cancer gene(s)", "Fusion gene(s)", "Effector gene", "Source", "Cancer acronym")

DT::datatable( data = pizzly.fusions.clean, filter="none", rownames = FALSE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800,  escape = FALSE) %>%
    DT::formatStyle( columns = names(pizzly.fusions.clean), `font-size` = '12px', 'text-align' = 'center' ) %>%
  
    ##### Highlight rows with fusions involving hihgly abundant transcripts (red) or cancer genes (blue)
    DT::formatStyle( columns = colnames(pizzly.fusions.clean) %in% "Abundant transcript(s)", backgroundColor = DT::styleEqual(c("-", "Yes"), c('transparent', 'coral')) ) %>%
    DT::formatStyle( columns = colnames(pizzly.fusions.clean) %in% "Cancer gene(s)", backgroundColor = DT::styleEqual(c("-", "Yes"), c('transparent', 'lightblue')) )

Table legend Cell in RED indicate highly abundant fusion transcript(s) and those hihglighted in BLUE indicate fusions containing Cancer genes. Genes known to be involved in gene fusions are flagged based on information provided in Cancer Genome Interpreter (CGI) database.

Abundant transcript(s): gene fusion events involving highly abundant transcript(s)

Cancer gene(s): gene fusion events involving Cancer genes

Fusion gene(s): gene(s) known to be involved in tumorigenesis across cancer types via translocation

Effector gene: gene most likely to promote the tumorigenesis between the two translocation partners

Source: source from which the information has been extracted

  • biomarker: gene with mutations known to be biomarkers of response to drugs
  • cgc: gene obtained from the Cancer Gene Census
  • in silico predicted: gene identified by an ensemble of bioinformatics tools to have signals of positive selection in its mutational pattern across tumors (obtained from the IntOGen repository)
  • predisposing: gene with clinical evidence of predisposing to cancer (obtained from the literature)
  • validated: experimentally validated cancer driver gene (obtained from the literature)

Cancer acronym: acronym of cancer type in which the fusion event was detected and reported

AA adrenal adenoma, ABC aneurysmal bone cyst, AC adrenal cortex, ACA adrenal cortex adenoma, ACC adrenal cortex carcinoma, ACML atypical chronic myeloid leukemia, AEL acute erythroid leukemia, ALCL anaplastic large cell lymphoma, ALL acute lymphoblastic leukemia, ALM acral lentigious melanoma, AML acute myeloid lekemia, APML acute promyelocytic leukemia, AS angiosarcoma, B brain, BCC basal cell carcinoma, BCL b cell lymphoma, BLCA bladder , BLTC bladder transitional cell, BLY burkitt lymphoma, BRCA breast , BRCA/OV-PR breast/ovary predisposing, BRCAL breast lobular, BRCALU breast luminal, BT billiary tract, CANCER cancer, CANCER-PR cancer-predisposing, CCS clear cell sarcoma, CER cervix, CESC cervix squamous cell, CH cholangiocarcinoma, CLL chronic lymphocytic leukemia, CM cutaneous melanoma, CM-PR cutaneous melanoma – predisposing, CML chronic myeloid lekemia, CMP chronic myeloproliferative neoplasm, CNL chronic neutrophilic leukemia, CNS central nervous system, CNSLY central nervous system lymphoma, COC colon carcinoma, COC-PR colon carcinoma – predisposing, COCA colon adenocarcinoma, COREAD colorectal adenocarcinoma, COREAD-PR colorectal adenocarcinoma – predisposing, CS chondrosarcoma, CTCL cutaneous T-cell lymphoma, DEST desmoid tumor, DFS dermatofibrosarcoma, DLBCL diffuse large B cell lymphoma, DNT dysembryoplastic neuroephitelial tumor, DSRCT desmoplastic small round cell tumor, ECL eosinophilic chronic leukemia, ED endometrium, EDA endometrial adenocarcinoma, EDS endometrial stromal, ES endocrine system, ESCA esophagous, ESCC esophagous clear cell, ESSC esophagous squamous cell, EWS ewing sarcoma, FGCT female germ cell tumor, FH fibrous histiocytoma, FIBS-PR fibrosarcoma - predisposing, FL follicular lymphoma, G glioma, GB glioblastoma, GBM glioblastoma multiforme, GIST gastrointestinal stromal, GIST-PR gastrointestinal stromal – predisposing, HA hepatic adenoma, HB hepatic blastoma, HC hepatic carcinoma, HC-PR hepatic carcinoma – predisposing, HCL hairy cell leukemia, HEMATO hematological malignancy, HEMATO-PR hematological malignancy – predisposing, HES hyper eosinophilic advanced syndrome, HGP hemangiopericytoma, HISEC erdheim-chester histiocytosis, HISLC largerhans cell histiocytosis, HLY hodgkin lymphoma, HNC head and neck, HNSC head and neck squamous, IM immflamatory myofibroblastic, L lung, LBCL large b-cell lymphoma, LGG lower grade glioma, LGLK large granular leukemia, LIP liposarcoma, LK leukemia, LUAD lung adenocarcinoma, LUSC lung squamous cell, LY luymphoma, MA malignant astrocytoma, MALT malt lymphoma, MAS mastocytosis, MB medulloblastoma, MCL mantle cell lymphoma, MCLK mast cell leukemia, MDPS myelodisplatic proliferative syndrome, MDS myelodisplasicsyndrome, MEN meningioma, MERC merkel cell carcinoma, MESO mesothelioma, MFH myxoidfibrosarcoma, MGCT male germ cell tumor, MKB megakaryoblastic leukemia, MM multiple myeloma, MML myelomonocytic leukemia, MPN malignant peripheral nerve sheat tumor, MRT malignant rhaboid tumor, MRT-PR malignant rhaboid tumor – predisposing, MUCM mucosal melanoma, MY myelofibrosis, MYEP myoepithelioma, MYMA myeloma, MYX myxoma, NB neuroblastoma, NF neurofibromatosis, NHLY non-hodgkin lymphoma, NMC nut midline carcinoma, NSCLC non-small cell lung, NSMGCT non-seminomatous germ cell tumor, NSPH nasopharyngeal, OCSC oral cavity squamous cell, OPH ororpharynx squamous cell, OS osteosarcoma, OV ovary, OVCC ovary clear cell, OVE ovary ephitelial cell, OVG ovary granulosa cell, OVSC ovary small cell, PA pancreas, PAAC pancreas acinar, PAAD pancreas adenocarcinoma, PAIS pancreas islet, PANC pancreas neuroendocrine, PARG paraganglioma, PATH parathyroid adenoma, PCPG pheochromocytoma and paranganglioma, PEC perivascular epithelioid cell, PG pediatric glioma, PHEO pheochromacytoma, PIA pilocytic astrocytoma, PIGA pituitary gland adenoma, PPB pleuropulmonary blastoma, PRAD prostate adenocarcinoma, PTCL perypheral t-cell lymphoma, PV polycitemia vera, R renal, R-PR renal – predisposing, RB retinoblastoma, RCCC renal clear cell, REC rectal carcinoma, RHBDS rhabdomyosarcoma, RPC renal papillary cell, S sarcoma, SCC squamous cell carcinoma, SCC-PR squamous cell carcinoma – predisposing, SCGS sex-cord gonadal stromal, SCHW schwannoma, SCLC small cell lung, SG salivary glands, SGA salivary glands adenoma, SGM salivary glands mucoepidermoid, SIN small intestine, SINNE small intestine neuroendocrine, SK skin, SK-PR skin – predisposing, SM systemic mastocytosis, SOLID solid tumors, SOLID-PR solid tumors - predisposing , ST stomach, ST-PR stomach – predisposing, STAD stomach adenocarcinoma, STS soft tissue sarcoma, SYNS synovial sarcoma, T testis, TCALL t cell acute lymphoblastic leukemia, TCPL t cell prolymphocytic leukemia, TER teratoma, TH thyroid, THA thyroid adenoma, THCA thyroid carcinoma, THF thyroid follicular, THM thyroid medullary, THYM thymic, TOSC tongue squamous cell, UCEC uterine corpus endometroid carcinoma, UCS uterine carcinosarcoma, UTH urothelial, UVM uveal melanoma, WM waldenstrom macroglobullinemia, WT wilms tumor


Genomic view

Gene fusion events involving highly abundant transcript(s) or Cancer genes are presented in the genomic context. The table at the bottom contains genomic coordingates of individual fusion genes sorted based on their genomic location.

##### Keep only fusions with abundant transcript(s) or cancer gene(s) involved
fusion_annot_top <- fusion_annot[ fusion_annot$pizzly.fusions.annot.fusions_abundant == "Yes" | fusion_annot$pizzly.fusions.annot.fusions_cancer == "Yes" , ]

##### Generate bezier curves-like plot representing gene fusion events
fusionsBezierPlot(fusion_annot_top, ref_datasets.list[[tissue]][["gene_annot_all"]])

##### Clean the table for better presentation
fusion_annot_top.clean <- fusion_annot_top[, c("SYMBOL", "SEQNAME", "GENESEQSTART", "GENESEQEND", "SYMBOL.1", "SEQNAME.1", "GENESEQSTART.1", "GENESEQEND.1", "pizzly.fusions.annot.fusions_abundant", "pizzly.fusions.annot.fusions_cancer") ]

##### Order fusions based on the genomic location (chrom and start positions)
chrOrder <-c((1:22),"X","Y","M")

fusion_annot_top.clean$SEQNAME <- factor(fusion_annot_top.clean$SEQNAME, chrOrder, ordered=TRUE)
fusion_annot_top.clean$SEQNAME.1 <- factor(fusion_annot_top.clean$SEQNAME.1, chrOrder, ordered=TRUE)
fusion_annot_top.clean <- fusion_annot_top.clean[do.call(order, fusion_annot_top.clean[, c("SEQNAME", "SEQNAME.1", "GENESEQSTART", "GENESEQSTART.1")]), ]

names(fusion_annot_top.clean) <- c("Gene A", "Chrom (A)", "Start (A)", "End (A)", "Gene B", "Chrom (B)", "Start (B)", "End (B)", "Abundant transcript(s)", "Cancer gene(s)")

DT::datatable( data = fusion_annot_top.clean, filter="none", rownames = FALSE, extensions = c('Buttons','Scroller'), options = list(pageLength = 10, dom = 'Bfrtip', buttons = c('excel', 'csv', 'pdf','copy','colvis'), scrollX = TRUE, deferRender = TRUE, scrollY = 200, scroller = TRUE), width = 800,  escape = FALSE) %>%
    DT::formatStyle( columns = names(fusion_annot_top.clean), `font-size` = '12px', 'text-align' = 'center' ) %>%
  
    ##### Highlight rows with fusions involving hihgly abundant transcripts (red) or cancer genes (blue)
    DT::formatStyle( columns = colnames(fusion_annot_top.clean) %in% "Abundant transcript(s)", backgroundColor = DT::styleEqual(c("-", "Yes"), c('transparent', 'coral')) ) %>%
    DT::formatStyle( columns = colnames(fusion_annot_top.clean) %in% "Cancer gene(s)", backgroundColor = DT::styleEqual(c("-", "Yes"), c('transparent', 'lightblue')) )

- Fusion genes

Expression profiles for the top 5 gene fusion events detected in patient’s sample and listed within red (abundant transcripts and blue (cancer genes) sections in the Gene fusion events table).

NOTE: the Fusion events visualisation is not available for gene fusions for which no junctions were found by clinker.

MARCH3-C5orf63

Fusion events visualisation
##### Convert the fusion image from pdf to png and present it in the report
geneA <- pizzly.fusions2$geneA.name[1]
geneB <- pizzly.fusions2$geneB.name[1]

##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists(paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
  
  fusion_png(geneA, geneB, paste(dataDir, "clinker", sep="/") )
}
##### Present a table with number of reads supporting this fusion
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {

  fusion_result_reads <- result_reads[ result_reads$geneA.name == geneA & result_reads$geneB.name == geneB, ]

  if( nrow(fusion_result_reads) !=0 ) {
    kable(fusion_result_reads, row.names = FALSE, caption = paste0("Number of reads supporting ", paste(geneA, geneB, sep="-"), " fusion")) %>%
    kable_styling(font_size = 12, "striped", "bordered") %>%
    scroll_box(width = "100%")
  }
}

Fusion genes expression

mRNA expression levels of fusion genes detected in patient’s sample and their average mRNA expression (Z-score) in samples from cancer patients and healthy individuals.

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
targets <- ref_datasets.list[[tissue]][["sample_annot"]]
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]

cdfPlot(gene = geneA, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)

cdfPlot(gene = geneB, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Z-scores
fusion.df <- ref_datasets.list[[tissue]][["gene_annot"]]
fusion.df <- rbind(fusion.df[fusion.df$SYMBOL==geneA, ], fusion.df[fusion.df$SYMBOL==geneB, ])

exprTable( genes = fusion.df$SYMBOL, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c(1,2,4)], type = "z")[[1]]

Table legend The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


Percentiles
exprTable( genes = fusion.df$SYMBOL, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c(1,2,4)], type = "perc")[[1]]

Table legend The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


COL3A1-COL1A2

Fusion events visualisation
##### Convert the fusion image from pdf to png and present it in the report
geneA <- pizzly.fusions2$geneA.name[2]
geneB <- pizzly.fusions2$geneB.name[2]

##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
  
  fusion_png(geneA, geneB, paste(dataDir, "clinker", sep="/") )
}
##### Present a table with number of reads supporting this fusion
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
  
  fusion_result_reads <- result_reads[ result_reads$geneA.name == geneA & result_reads$geneB.name == geneB, ]
  
  if( nrow(fusion_result_reads) !=0 ) {
    kable(fusion_result_reads, row.names = FALSE, caption = paste0("Number of reads supporting ", paste(geneA, geneB, sep="-"), " fusion")) %>%
    kable_styling(font_size = 12, "striped", "bordered") %>%
    scroll_box(width = "100%")
  }
}

Fusion genes expression

mRNA expression levels of fusion genes detected in patient’s sample and their average mRNA expression (Z-score) in samples from cancer patients and healthy individuals.

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
cdfPlot(gene = geneA, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)

cdfPlot(gene = geneB, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Z-scores
fusion.df <- ref_datasets.list[[tissue]][["gene_annot"]]
fusion.df <- rbind(fusion.df[fusion.df$SYMBOL==geneA, ], fusion.df[fusion.df$SYMBOL==geneB, ])

exprTable( genes = fusion.df$SYMBOL, data = data, targets = targets, sampleName  = params$sample_name, normal  = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes  = ref_genes.list[["oncokb_genes"]][,c(1,2,4)], type = "z")[[1]]

Table legend The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


Percentiles
exprTable( genes = fusion.df$SYMBOL, data = data, targets = targets, sampleName  = params$sample_name, normal  = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes  = ref_genes.list[["oncokb_genes"]][,c(1,2,4)], type = "perc")[[1]]

Table legend The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


COL1A1-COL3A1

Fusion events visualisation
##### Convert the fusion image from pdf to png and present it in the report
geneA <- pizzly.fusions2$geneA.name[3]
geneB <- pizzly.fusions2$geneB.name[3]

##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
  
  fusion_png(geneA, geneB, paste(dataDir, "clinker", sep="/") )
}
##### Present a table with number of reads supporting this fusion
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
  
  fusion_result_reads <- result_reads[ result_reads$geneA.name == geneA & result_reads$geneB.name == geneB, ]
  
  if( nrow(fusion_result_reads) !=0 ) {
    kable(fusion_result_reads, row.names = FALSE, caption = paste0("Number of reads supporting ", paste(geneA, geneB, sep="-"), " fusion")) %>%
    kable_styling(font_size = 12, "striped", "bordered") %>%
    scroll_box(width = "100%")
  }
}

Fusion genes expression

mRNA expression levels of fusion genes detected in patient’s sample and their average mRNA expression (Z-score) in samples from cancer patients and healthy individuals.

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
cdfPlot(gene = geneA, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)

cdfPlot(gene = geneB, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Z-scores
fusion.df <- ref_datasets.list[[tissue]][["gene_annot"]]
fusion.df <- rbind(fusion.df[fusion.df$SYMBOL==geneA, ], fusion.df[fusion.df$SYMBOL==geneB, ])

exprTable( genes = fusion.df$SYMBOL, data = data, targets = targets, sampleName  = params$sample_name, normal  = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes  = ref_genes.list[["oncokb_genes"]][,c(1,2,4)], type = "z")[[1]]

Table legend The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


Percentiles
exprTable( genes = fusion.df$SYMBOL, data = data, targets = targets, sampleName  = params$sample_name, normal  = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes  = ref_genes.list[["oncokb_genes"]][,c(1,2,4)], type = "perc")[[1]]

Table legend The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


EIF4A2-PTMA

Fusion events visualisation
##### Convert the fusion image from pdf to png and present it in the report
geneA <- pizzly.fusions2$geneA.name[4]
geneB <- pizzly.fusions2$geneB.name[4]

##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists(paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
  
  fusion_png(geneA, geneB, paste(dataDir, "clinker", sep="/") )
}

##### Present a table with number of reads supporting this fusion
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {

  fusion_result_reads <- result_reads[ result_reads$geneA.name == geneA & result_reads$geneB.name == geneB, ]

  if( nrow(fusion_result_reads) !=0 ) {
    kable(fusion_result_reads, row.names = FALSE, caption = paste0("Number of reads supporting ", paste(geneA, geneB, sep="-"), " fusion")) %>%
    kable_styling(font_size = 12, "striped", "bordered") %>%
    scroll_box(width = "100%")
  }
}

Fusion genes expression

mRNA expression levels of fusion genes detected in patient’s sample and their average mRNA expression (Z-score) in samples from cancer patients and healthy individuals.

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
cdfPlot(gene = geneA, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)

cdfPlot(gene = geneB, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Z-scores
fusion.df <- ref_datasets.list[[tissue]][["gene_annot"]]
fusion.df <- rbind(fusion.df[fusion.df$SYMBOL==geneA, ], fusion.df[fusion.df$SYMBOL==geneB, ])

exprTable( genes = fusion.df$SYMBOL, data = data, targets = targets, sampleName  = params$sample_name, normal  = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes  = ref_genes.list[["oncokb_genes"]][,c(1,2,4)], type = "z")[[1]]

Table legend The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


Percentiles
exprTable( genes = fusion.df$SYMBOL, data = data, targets = targets, sampleName  = params$sample_name, normal  = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes  = ref_genes.list[["oncokb_genes"]][,c(1,2,4)], type = "perc")[[1]]

Table legend The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The Patient vs Normal column illustrates the difference between percentile in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


FUBP1-HDLBP

Fusion events visualisation
##### Convert the fusion image from pdf to png and present it in the report
geneA <- pizzly.fusions2$geneA.name[5]
geneB <- pizzly.fusions2$geneB.name[5]

##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists(paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {
  
  fusion_png(geneA, geneB, paste(dataDir, "clinker", sep="/") )
}

##### Present a table with number of reads supporting this fusion
##### Check if clinker fusion plot exists. Skip this section in it doesn't
if ( file.exists( paste0(dataDir, "/clinker/", geneA, "_", geneB, ".pdf")) ) {

  fusion_result_reads <- result_reads[ result_reads$geneA.name == geneA & result_reads$geneB.name == geneB, ]

  if( nrow(fusion_result_reads) !=0 ) {
    kable(fusion_result_reads, row.names = FALSE, caption = paste0("Number of reads supporting ", paste(geneA, geneB, sep="-"), " fusion")) %>%
    kable_styling(font_size = 12, "striped", "bordered") %>%
    scroll_box(width = "100%")
  }
}

Fusion genes expression

mRNA expression levels of fusion genes detected in patient’s sample and their average mRNA expression (Z-score) in samples from cancer patients and healthy individuals.

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
cdfPlot(gene = geneA, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)

cdfPlot(gene = geneB, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = FALSE, plot_mode = params$plots_mode)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Z-scores
fusion.df <- ref_datasets.list[[tissue]][["gene_annot"]]
fusion.df <- rbind(fusion.df[fusion.df$SYMBOL==geneA, ], fusion.df[fusion.df$SYMBOL==geneB, ])

exprTable( genes = fusion.df$SYMBOL, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c(1,2,4)], type = "z")[[1]]

Table legend The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


Percentiles
exprTable( genes = fusion.df$SYMBOL, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], OncoKB_genes = ref_genes.list[["oncokb_genes"]][,c(1,2,4)], type = "perc")[[1]]

Table legend The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each fusion gene. Genes considered to be oncogenes or tumour suppressor genes, according to OncoKB database, are also indicated. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


Immune Response Markers

Section presenting expression levels for immune response markers to assess pre-existing anti-cancer immunity and likelihood of response to immunotherapy)

Out of the 53 immune response markers the expression of 36 was reliably measured in patient’s sample. The remaining 17 genes are either not expressed or their expression level is too low to be detected (indicated in BLANK cells with missing values).

- Summary table

Z-scores

##### Generate expression summary table for cancer genes from OncoKB and PMCC
targets <- ref_datasets.list[[tissue]][["sample_annot"]]
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genes <- unique(unlist(ref_genes.list[["genes_immune"]]))

immune_genes.expr.z <- exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], type = "z")

##### Present the expression summary table
immune_genes.expr.z[[1]]

Table legend The RED colour range indicate relatively high expression (Z-score) values and BLUE colour range indicate relatively low expression (Z-score) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between Z-scores in patient sample and cohort of healthy individuals for each cancer gene. Genes are ordered by decreasing absolute values in the Patient vs Normal column. SD - standard deviation


Percentiles

##### Generate expression summary table for cancer genes from OncoKB and PMCC
immune_genes.expr.perc <- exprTable( genes = genes, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group , cancer = cancer_group, genes_annot = ref_datasets.list[[tissue]][["gene_annot_all"]][, c("SYMBOL", "ENSEMBL")], type = "perc")

##### Present the expression summary table
immune_genes.expr.perc[[1]]

Table legend The RED colour range indicate relatively high expression (percentile) values and BLUE colour range indicate relatively low expression (percentile) values in individual sample group. The BLANK cells with missing values indicate genes with no/low expression. The Patient vs Normal column illustrates the difference between percentiles in patient sample and cohort of healthy individuals for each cancer gene. Genes are ordered by decreasing absolute values in the Patient vs Normal column. TSG - tumour suppressor gene; SD - standard deviation


- Expression profiles

Expression profiles for the top 10 immune response markers with the greatest difference in standarised (Z-score) mRNA expression values between patient’s sample and the average mRNA expression in samples from healthy individuals.

CD68

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[1]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

CD28

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[2]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

ICOS

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[3]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

BTLA

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[4]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

CTLA4

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[5]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

CXCL10

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[6]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

IDO1

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[7]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

CD2

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[8]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

CD40LG

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[9]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

CXCR6

suppressMessages(library(plotly))
##### Generate empirical cumulative distribution function (ECDF) plot illustrating mRNA expression level the genes of interest in the context of the overall mRNA expression distribution
gene <- immune_genes.expr.z[[2]]$SYMBOL[10]

##### Generate CDF plot and add boxplot below to show the data variance for selected gene in individual groups
if ( !is.na(gene) ) {
  cdfPlot(gene = gene, data = data, targets = targets, sampleName = params$sample_name, normal = normal_group, cancer = cancer_group, addBoxPlot = TRUE, plot_mode = params$plots_mode)

} else {
  cat(paste("There are only", length(immune_genes.expr.z[[2]]$SYMBOL), "immune response marker(s) detected!", sep=" "))
}

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Drug matching

List of suitable drugs based on actionable targets identified among Mutated genes, Copy-number altered genes, dysregulated Cancer genes and Gene fusion events, which can be considered in the treatment decision making process. The clinically actionable aberrations are matched based on information provided by clinical interpretations of variants in Cancer (CIViC) (Griffith et al. (2017)).

- Mutated genes

Genes with PCGR tier 1-3 variants were screened for suitable drugs (see Mutated genes section).

Predictive

Evidence pertaining to a variant’s effect on therapeutic response.

##### Generate table with drugs targeting mutated cancer genes
genes <- mut_genes.expr.z[[2]]$SYMBOL

civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predictive")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Prognostic

Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.

##### Generate table with drugs targeting mutated cancer genes
civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Prognostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Diagnostic

Evidence pertaining to a variant’s impact on patient diagnosis.

##### Generate table with drugs targeting mutated cancer genes
civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Diagnostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Predisposing

Evidence pertains to a variant’s role in conferring susceptibility to a disease.

##### Generate table with drugs targeting mutated cancer genes
civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predisposing")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

- CN altered genes

Genes with CN values > 4 (gains) and or < 1 (losses) were screened for suitable drugs (see Copy-number altered genes section).

Gains

Predictive

Evidence pertaining to a variant’s effect on therapeutic response.

##### Generate table with drugs targeting CN altered genes
genes <- mut_cn_expr_genes.expr.gains.z[[2]]$SYMBOL

civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predictive")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Prognostic

Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.

##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Prognostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Diagnostic

Evidence pertaining to a variant’s impact on patient diagnosis.

##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Diagnostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Predisposing

Evidence pertains to a variant’s role in conferring susceptibility to a disease.

##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predisposing")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Losses

Predictive

Evidence pertaining to a variant’s effect on therapeutic response.

##### Generate table with drugs targeting CN altered genes
genes <- mut_cn_expr_genes.expr.losses.z[[2]]$SYMBOL

civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predictive")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Evidence Type

  • Predictive: Evidence pertaining to a variant’s effect on therapeutic response
  • Diagnostic: Evidence pertaining to a variant’s impact on patient diagnosis
  • Prognostic: Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival
  • Predisposing: Evidence pertains to a variant’s role in conferring susceptibility to a disease

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Prognostic

Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.

##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Prognostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Evidence Type

  • Predictive: Evidence pertaining to a variant’s effect on therapeutic response
  • Diagnostic: Evidence pertaining to a variant’s impact on patient diagnosis
  • Prognostic: Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival
  • Predisposing: Evidence pertains to a variant’s role in conferring susceptibility to a disease

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Diagnostic

Evidence pertaining to a variant’s impact on patient diagnosis.

##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Diagnostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Evidence Type

  • Predictive: Evidence pertaining to a variant’s effect on therapeutic response
  • Diagnostic: Evidence pertaining to a variant’s impact on patient diagnosis
  • Prognostic: Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival
  • Predisposing: Evidence pertains to a variant’s role in conferring susceptibility to a disease

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Predisposing

Evidence pertains to a variant’s role in conferring susceptibility to a disease.

##### Generate table with drugs targeting CN altered genes
civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predisposing")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Evidence Type

  • Predictive: Evidence pertaining to a variant’s effect on therapeutic response
  • Diagnostic: Evidence pertaining to a variant’s impact on patient diagnosis
  • Prognostic: Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival
  • Predisposing: Evidence pertains to a variant’s role in conferring susceptibility to a disease

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

- Cancer genes

The top 50 cancer genes with the greatest difference in standarised (Z-score) mRNA expression values between patient’s sample and the average mRNA expression in samples from healthy individuals were screened for suitable drugs (see Cancer genes section).

Predictive

Evidence pertaining to a variant’s effect on therapeutic response.

##### Generate table with drugs targeting dysregulated cancer genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genes <- cancer_genes.expr.z[[2]]$SYMBOL[1:50]

civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predictive")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Prognostic

Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.

##### Generate table with drugs targeting dysregulated cancer genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genes <- cancer_genes.expr.z[[2]]$SYMBOL[1:50]

civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Prognostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Diagnostic

Evidence pertaining to a variant’s impact on patient diagnosis.

##### Generate table with drugs targeting dysregulated cancer genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genes <- cancer_genes.expr.z[[2]]$SYMBOL[1:50]

civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Diagnostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Predisposing

Evidence pertains to a variant’s role in conferring susceptibility to a disease.

##### Generate table with drugs targeting dysregulated cancer genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genes <- cancer_genes.expr.z[[2]]$SYMBOL[1:50]

civicDrugTable(genes, caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predisposing")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

- Gene fusion events

Gene fusion events involving highly abundant transcripts and cancer genes (red and blue sections in Gene fusion events table) were screened for suitable drugs.

Highly abundant

Predictive

Evidence pertaining to a variant’s effect on therapeutic response.

##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(deduped.result$geneA.name)[deduped.result$geneA.name]
genesB <- levels(deduped.result$geneB.name)[deduped.result$geneB.name]

civicDrugTable(c(genesA, genesB), caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predictive")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Prognostic

Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.

##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(deduped.result$geneA.name)[deduped.result$geneA.name]
genesB <- levels(deduped.result$geneB.name)[deduped.result$geneB.name]

civicDrugTable(c(genesA, genesB), caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Prognostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Diagnostic

Evidence pertaining to a variant’s impact on patient diagnosis.

##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(deduped.result$geneA.name)[deduped.result$geneA.name]
genesB <- levels(deduped.result$geneB.name)[deduped.result$geneB.name]

civicDrugTable(c(genesA, genesB), caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Diagnostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Predisposing

Evidence pertains to a variant’s role in conferring susceptibility to a disease.

##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(deduped.result$geneA.name)[deduped.result$geneA.name]
genesB <- levels(deduped.result$geneB.name)[deduped.result$geneB.name]

civicDrugTable(c(genesA, genesB), caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predisposing")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Containing cancer genes

Predictive

Evidence pertaining to a variant’s effect on therapeutic response.

##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(result.cancer_genes$geneA.name)[result.cancer_genes$geneA.name]
genesB <- levels(result.cancer_genes$geneB.name)[result.cancer_genes$geneB.name]

civicDrugTable(c(genesA, genesB), caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predictive")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Evidence Type

  • Predictive: Evidence pertaining to a variant’s effect on therapeutic response
  • Diagnostic: Evidence pertaining to a variant’s impact on patient diagnosis
  • Prognostic: Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival
  • Predisposing: Evidence pertains to a variant’s role in conferring susceptibility to a disease

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Prognostic

Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival.

##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(result.cancer_genes$geneA.name)[result.cancer_genes$geneA.name]
genesB <- levels(result.cancer_genes$geneB.name)[result.cancer_genes$geneB.name]

civicDrugTable(c(genesA, genesB), caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Prognostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Evidence Type

  • Predictive: Evidence pertaining to a variant’s effect on therapeutic response
  • Diagnostic: Evidence pertaining to a variant’s impact on patient diagnosis
  • Prognostic: Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival
  • Predisposing: Evidence pertains to a variant’s role in conferring susceptibility to a disease

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Diagnostic

Evidence pertaining to a variant’s impact on patient diagnosis.

##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(result.cancer_genes$geneA.name)[result.cancer_genes$geneA.name]
genesB <- levels(result.cancer_genes$geneB.name)[result.cancer_genes$geneB.name]

civicDrugTable(c(genesA, genesB), caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Diagnostic")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Evidence Type

  • Predictive: Evidence pertaining to a variant’s effect on therapeutic response
  • Diagnostic: Evidence pertaining to a variant’s impact on patient diagnosis
  • Prognostic: Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival
  • Predisposing: Evidence pertains to a variant’s role in conferring susceptibility to a disease

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Predisposing

Evidence pertains to a variant’s role in conferring susceptibility to a disease.

##### Generate table with drugs targeting fusion genes
data <- ref_datasets.list[[tissue]][["combined_data_processed"]]
genesA <- levels(result.cancer_genes$geneA.name)[result.cancer_genes$geneA.name]
genesB <- levels(result.cancer_genes$geneB.name)[result.cancer_genes$geneB.name]

civicDrugTable(c(genesA, genesB), caner_genes_annot.list[["civic_var_summaries"]], caner_genes_annot.list[["civic_clin_evid"]], "Predisposing")[[1]]

Table legend Evidence Level

  • A - Validated association: Proven/consensus association in human medicine
  • B - Clinical evidence: Clinical trial or other primary patient data supports association
  • C - Case study: Individual case reports from clinical journals
  • D - Preclinical evidence: In vivo or in vitro models support association
  • E - Inferential association: Indirect evidence

Evidence Type

  • Predictive: Evidence pertaining to a variant’s effect on therapeutic response
  • Diagnostic: Evidence pertaining to a variant’s impact on patient diagnosis
  • Prognostic: Evidence pertaining to a variant’s impact on disease progression, severity, or patient survival
  • Predisposing: Evidence pertains to a variant’s role in conferring susceptibility to a disease

Trust Rating

  • 5: Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered
  • 4: Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning
  • 3: Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning
  • 2: Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing
  • 1: Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims

Actionability Score

  • CIViC Actionability Score allows to assess the accumulation of evidence for each variant. It is calculated by adding all Evidence Item Scores for each variant. The Evidence Item Score is calculated by multiplying the evidence level (A=10 points, B=5 points, C=3 points, D=1 point, E=0.25 points) by the trust rating (Each Star = 1 point).

Addendum

for ( i in 1:length(params) ) {

  cat(paste("Parameter: ", names(params)[i], "\nValue: ", paste(unlist(params[i]), collapse = ","), "\n\n", sep=""))
}
Parameter: ref_data_dir
Value: ../data

Parameter: genes_cancer
Value: genes/cancer_genes.20181112.genes

Parameter: genes_immune
Value: genes/Genes_immune.txt

Parameter: oncokb_genes
Value: OncoKB/CancerGenesList.txt

Parameter: oncokb_clin_vars
Value: OncoKB/allActionableVariants.txt

Parameter: oncokb_all_vars
Value: OncoKB/allAnnotatedVariants.txt

Parameter: civic_var_summaries
Value: CIViC/01-Oct-2018-VariantSummaries.tsv

Parameter: civic_clin_evid
Value: CIViC/01-Oct-2018-ClinicalEvidenceSummaries.tsv

Parameter: cancer_biomarkers_trans
Value: cancer_biomarkers_database/cancer_genes_upon_trans.tsv

Parameter: cn_loss
Value: 1

Parameter: cn_gain
Value: 4

Parameter: ensembl_version
Value: 86

Parameter: ucsc_genome_assembly
Value: 19

Parameter: report_dir
Value: /Users/jmarzec/data/Patients/WTS/CCR180038_SV18T002P006_RNA/RNAseq_report

Parameter: sample_name
Value: CCR180038_SV18T002P006_RNA

Parameter: tissue
Value: pancreas

Parameter: plots_mode
Value: static

Parameter: count_file
Value: /Users/jmarzec/data/Patients/WTS/CCR180038_SV18T002P006_RNA/CCR180038_SV18T002P006_RNA-ready.counts
devtools::session_info()
─ Session info ──────────────────────────────────────────────────────────
 setting  value                       
 version  R version 3.5.0 (2018-04-23)
 os       macOS  10.14.2              
 system   x86_64, darwin15.6.0        
 ui       X11                         
 language (EN)                        
 collate  en_AU.UTF-8                 
 ctype    en_AU.UTF-8                 
 tz       Australia/Melbourne         
 date     2019-01-11                  

─ Packages ──────────────────────────────────────────────────────────────
 package                     * version    date       lib
 AnnotationDbi               * 1.44.0     2018-10-30 [1]
 AnnotationFilter            * 1.6.0      2018-10-30 [1]
 assertthat                    0.2.0      2017-04-11 [1]
 backports                     1.1.3      2018-12-14 [1]
 bindr                         0.1.1      2018-03-13 [1]
 bindrcpp                    * 0.2.2      2018-03-29 [1]
 Biobase                     * 2.42.0     2018-10-30 [1]
 BiocGenerics                * 0.28.0     2018-10-30 [1]
 BiocParallel                  1.16.5     2019-01-05 [1]
 biomaRt                       2.38.0     2018-10-30 [1]
 Biostrings                  * 2.50.2     2019-01-03 [1]
 bit                           1.1-14     2018-05-29 [1]
 bit64                         0.9-7      2017-05-08 [1]
 bitops                        1.0-6      2013-08-17 [1]
 blob                          1.1.1      2018-03-25 [1]
 broom                         0.5.1      2018-12-05 [1]
 BSgenome                    * 1.50.0     2018-10-30 [1]
 BSgenome.Hsapiens.UCSC.hg19 * 1.4.0      2019-01-10 [1]
 callr                         3.1.1      2018-12-21 [1]
 cellranger                    1.1.0      2016-07-27 [1]
 cli                           1.0.1      2018-09-25 [1]
 colorspace                    1.3-2      2016-12-14 [1]
 crayon                        1.3.4      2017-09-16 [1]
 crosstalk                     1.0.0      2016-12-21 [1]
 curl                          3.2        2018-03-28 [1]
 data.table                    1.11.8     2018-09-30 [1]
 DBI                           1.0.0      2018-05-02 [1]
 DelayedArray                  0.8.0      2018-10-30 [1]
 desc                          1.2.0      2018-05-01 [1]
 devtools                      2.0.1      2018-10-26 [1]
 digest                        0.6.18     2018-10-10 [1]
 dplyr                       * 0.7.8      2018-11-10 [1]
 DT                            0.5        2018-11-05 [1]
 edgeR                       * 3.24.3     2019-01-02 [1]
 EnsDb.Hsapiens.v86          * 2.99.0     2018-10-22 [1]
 ensembldb                   * 2.6.3      2018-11-21 [1]
 evaluate                      0.12       2018-10-09 [1]
 farver                        1.1.0      2018-11-20 [1]
 forcats                     * 0.3.0      2018-02-19 [1]
 fs                            1.2.6      2018-08-23 [1]
 generics                      0.0.2      2018-11-29 [1]
 GenomeInfoDb                * 1.18.1     2018-11-12 [1]
 GenomeInfoDbData              1.2.0      2018-11-13 [1]
 GenomicAlignments             1.18.1     2019-01-04 [1]
 GenomicFeatures             * 1.34.1     2018-11-03 [1]
 GenomicRanges               * 1.34.0     2018-10-30 [1]
 getopt                        1.20.2     2018-02-16 [1]
 ggforce                     * 0.1.3      2018-07-07 [1]
 ggplot2                     * 3.1.0      2018-10-25 [1]
 glue                          1.3.0      2018-07-17 [1]
 gtable                        0.2.0      2016-02-26 [1]
 haven                         2.0.0      2018-11-22 [1]
 hms                           0.4.2      2018-03-10 [1]
 htmltools                     0.3.6      2017-04-28 [1]
 htmlwidgets                   1.3        2018-09-30 [1]
 httpuv                        1.4.5.1    2018-12-18 [1]
 httr                          1.4.0      2018-12-11 [1]
 IRanges                     * 2.16.0     2018-10-30 [1]
 jsonlite                      1.6        2018-12-07 [1]
 kableExtra                  * 0.9.0      2018-05-21 [1]
 knitr                       * 1.21       2018-12-10 [1]
 labeling                      0.3        2014-08-23 [1]
 later                         0.7.5      2018-09-18 [1]
 lattice                       0.20-38    2018-11-04 [1]
 lazyeval                      0.2.1      2017-10-29 [1]
 limma                       * 3.38.3     2018-12-02 [1]
 locfit                        1.5-9.1    2013-04-20 [1]
 lubridate                     1.7.4      2018-04-11 [1]
 magick                      * 2.0        2018-10-05 [1]
 magrittr                      1.5        2014-11-22 [1]
 MASS                          7.3-51.1   2018-11-01 [1]
 Matrix                      * 1.2-15     2018-11-01 [1]
 matrixStats                 * 0.54.0     2018-07-23 [1]
 memoise                       1.1.0      2017-04-21 [1]
 mime                          0.6        2018-10-05 [1]
 modelr                        0.1.2      2018-05-11 [1]
 munsell                       0.5.0      2018-06-12 [1]
 nlme                          3.1-137    2018-04-07 [1]
 NOISeq                      * 2.26.1     2019-01-04 [1]
 optparse                    * 1.6.0      2018-06-17 [1]
 pander                        0.6.3      2018-11-06 [1]
 pdftools                      2.0        2018-12-11 [1]
 pillar                        1.3.1      2018-12-15 [1]
 pkgbuild                      1.0.2      2018-10-16 [1]
 pkgconfig                     2.0.2      2018-08-16 [1]
 pkgload                       1.0.2      2018-10-29 [1]
 plotly                        4.8.0.9000 2019-01-04 [1]
 plyr                          1.8.4      2016-06-08 [1]
 png                           0.1-7      2013-12-03 [1]
 preprocessCore              * 1.44.0     2018-10-30 [1]
 prettyunits                   1.0.2      2015-07-13 [1]
 processx                      3.2.1      2018-12-05 [1]
 progress                      1.2.0      2018-06-14 [1]
 promises                      1.0.1      2018-04-13 [1]
 ProtGenerics                  1.14.0     2018-10-30 [1]
 ps                            1.3.0      2018-12-21 [1]
 purrr                       * 0.2.5      2018-05-29 [1]
 R6                            2.3.0      2018-10-04 [1]
 rapportools                 * 1.0        2014-01-07 [1]
 RColorBrewer                  1.1-2      2014-12-07 [1]
 Rcpp                          1.0.0      2018-11-07 [1]
 RCurl                         1.95-4.11  2018-07-15 [1]
 readr                       * 1.3.1      2018-12-21 [1]
 readxl                        1.2.0      2018-12-19 [1]
 remotes                       2.0.2      2018-10-30 [1]
 reshape                     * 0.8.8      2018-10-23 [1]
 rlang                         0.3.1      2019-01-08 [1]
 rmarkdown                     1.11       2018-12-08 [1]
 rprojroot                     1.3-2      2018-01-03 [1]
 Rsamtools                     1.34.0     2018-10-30 [1]
 RSQLite                       2.1.1      2018-05-06 [1]
 rstudioapi                    0.9.0      2019-01-09 [1]
 rtracklayer                 * 1.42.1     2018-11-21 [1]
 rvest                         0.3.2      2016-06-17 [1]
 S4Vectors                   * 0.20.1     2018-11-09 [1]
 scales                        1.0.0      2018-08-09 [1]
 sessioninfo                   1.1.1      2018-11-05 [1]
 shiny                         1.2.0      2018-11-02 [1]
 stringi                       1.2.4      2018-07-20 [1]
 stringr                     * 1.3.1      2018-05-10 [1]
 SummarizedExperiment          1.12.0     2018-10-30 [1]
 testthat                      2.0.1      2018-10-13 [1]
 tibble                      * 2.0.0      2019-01-04 [1]
 tidyr                       * 0.8.2      2018-10-28 [1]
 tidyselect                    0.2.5      2018-10-11 [1]
 tidyverse                   * 1.2.1      2017-11-14 [1]
 tweenr                        1.0.1      2018-12-14 [1]
 units                         0.6-2      2018-12-05 [1]
 usethis                       1.4.0      2018-08-14 [1]
 viridisLite                   0.3.0      2018-02-01 [1]
 withr                         2.1.2      2018-03-15 [1]
 xfun                          0.4        2018-10-23 [1]
 XML                           3.98-1.16  2018-08-19 [1]
 xml2                          1.2.0      2018-01-24 [1]
 xtable                        1.8-3      2018-08-29 [1]
 XVector                     * 0.22.0     2018-10-30 [1]
 yaml                          2.2.0      2018-07-25 [1]
 zlibbioc                      1.28.0     2018-10-30 [1]
 source                          
 Bioconductor                    
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 Bioconductor                    
 Bioconductor                    
 Bioconductor                    
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 Bioconductor                    
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 Bioconductor                    
 Bioconductor                    
 Bioconductor                    
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Github (ropensci/plotly@c6b8a5d)
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.2)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 CRAN (R 3.5.0)                  
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.2)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 CRAN (R 3.5.0)                  
 Bioconductor                    
 CRAN (R 3.5.0)                  
 Bioconductor                    

[1] /Library/Frameworks/R.framework/Versions/3.5/Resources/library